home *** CD-ROM | disk | FTP | other *** search
/ PC World 2006 February / PCWorld_2006-02_cd.bin / software / vyzkuste / triky / triky.exe / httrack-3.33.exe / {app} / src / htsparse.c < prev    next >
C/C++ Source or Header  |  2005-01-08  |  184KB  |  4,178 lines

  1. /* ------------------------------------------------------------ */
  2. /*
  3. HTTrack Website Copier, Offline Browser for Windows and Unix
  4. Copyright (C) Xavier Roche and other contributors
  5.  
  6. This program is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU General Public License
  8. as published by the Free Software Foundation; either version 2
  9. of the License, or any later version.
  10.  
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. GNU General Public License for more details.
  15.  
  16. You should have received a copy of the GNU General Public License
  17. along with this program; if not, write to the Free Software
  18. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19.  
  20.  
  21. Important notes:
  22.  
  23. - We hereby ask people using this source NOT to use it in purpose of grabbing
  24. emails addresses, or collecting any other private information on persons.
  25. This would disgrace our work, and spoil the many hours we spent on it.
  26.  
  27.  
  28. Please visit our Website: http://www.httrack.com
  29. */
  30.  
  31.  
  32. /* ------------------------------------------------------------ */
  33. /* File: htsparse.c parser                                      */
  34. /*       html/javascript/css parser                             */
  35. /*       and other parser routines                              */
  36. /* Author: Xavier Roche                                         */
  37. /* ------------------------------------------------------------ */
  38.  
  39.  
  40. /* Internal engine bytecode */
  41. #define HTS_INTERNAL_BYTECODE
  42.  
  43. #ifndef  _WIN32_WCE
  44. #include <fcntl.h>
  45. #endif
  46. #include <ctype.h>
  47.  
  48. /* File defs */
  49. #include "htscore.h"
  50.  
  51. /* specific definitions */
  52. #include "htsbase.h"
  53. #include "htsnet.h"
  54. #include "htsbauth.h"
  55. #include "htsmd5.h"
  56. #include "htsindex.h"
  57.  
  58. /* external modules */
  59. #include "htsmodules.h"
  60.  
  61. // htswrap_add
  62. #include "htswrap.h"
  63.  
  64. // parser
  65. #include "htsparse.h"
  66.  
  67.  
  68. // specific defines
  69. #define urladr   (liens[ptr]->adr)
  70. #define urlfil   (liens[ptr]->fil)
  71. #define savename (liens[ptr]->sav)
  72. #define parenturladr   (liens[liens[ptr]->precedent]->adr)
  73. #define parenturlfil   (liens[liens[ptr]->precedent]->fil)
  74. #define parentsavename (liens[liens[ptr]->precedent]->sav)
  75. #define relativeurladr   ((!parent_relative)?urladr:parenturladr)
  76. #define relativeurlfil   ((!parent_relative)?urlfil:parenturlfil)
  77. #define relativesavename ((!parent_relative)?savename:parentsavename)
  78.  
  79. #define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog);  } }
  80.  
  81. // does nothing
  82. #define XH_uninit do {} while(0)
  83.  
  84. // version optimisΘe, qui permet de ne pas toucher aux html non modifiΘs (update)
  85. #define REALLOC_SIZE 8192
  86. #define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
  87.   ht_size=(A)+ht_len+REALLOC_SIZE; \
  88.   ht_buff=(char*) realloct(ht_buff,ht_size); \
  89.   if (ht_buff==NULL) { \
  90.   printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
  91.   XH_uninit; \
  92.   abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \
  93.   exit(1); \
  94.   } \
  95. } \
  96.   ht_len+=A;
  97. #define HT_ADD_ADR \
  98.   if ((opt->getmode & 1) && (ptr>0)) { \
  99.   int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
  100.   memcpy(ht_buff+j, lastsaved, i); \
  101.   ht_buff[j+i]='\0'; \
  102.   lastsaved=adr; \
  103.   }
  104. #define HT_ADD(A) \
  105.   if ((opt->getmode & 1) && (ptr>0)) { \
  106.   int i_=strlen(A),j_=ht_len; \
  107.   if (i_) { \
  108.   HT_ADD_CHK(i_) \
  109.   memcpy(ht_buff+j_, A, i_); \
  110.   ht_buff[j_+i_]='\0'; \
  111.   } }
  112. #define HT_ADD_HTMLESCAPED(A) \
  113.   if ((opt->getmode & 1) && (ptr>0)) { \
  114.     int i_, j_; \
  115.     char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
  116.     escape_for_html_print(A, tempo_); \
  117.     i_=strlen(tempo_); \
  118.     j_=ht_len; \
  119.     if (i_) { \
  120.     HT_ADD_CHK(i_) \
  121.     memcpy(ht_buff+j_, tempo_, i_); \
  122.     ht_buff[j_+i_]='\0'; \
  123.   } }
  124. #define HT_ADD_HTMLESCAPED_FULL(A) \
  125.   if ((opt->getmode & 1) && (ptr>0)) { \
  126.     int i_, j_; \
  127.     char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
  128.     escape_for_html_print_full(A, tempo_); \
  129.     i_=strlen(tempo_); \
  130.     j_=ht_len; \
  131.     if (i_) { \
  132.     HT_ADD_CHK(i_) \
  133.     memcpy(ht_buff+j_, tempo_, i_); \
  134.     ht_buff[j_+i_]='\0'; \
  135.   } }
  136. #define HT_ADD_START \
  137.   int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \
  138.   int ht_len=0; \
  139.   char* ht_buff=NULL; \
  140.   if ((opt->getmode & 1) && (ptr>0)) { \
  141.   ht_buff=(char*) malloct(ht_size); \
  142.   if (ht_buff==NULL) { \
  143.   printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
  144.   XH_uninit; \
  145.   abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \
  146.   exit(1); \
  147.   } \
  148.   ht_buff[0]='\0'; \
  149.   }
  150. #define HT_ADD_END { \
  151.   int ok=0;\
  152.   if (ht_buff) { \
  153.   char digest[32+2];\
  154.   digest[0]='\0';\
  155.   domd5mem(ht_buff,ht_len,digest,1);\
  156.   if (fsize(fconv(savename))==ht_len) { \
  157.   int mlen = 0;\
  158.   char* mbuff;\
  159.   cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
  160.   if (mlen) mbuff[mlen]='\0';\
  161.   if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
  162.   ok=1;\
  163.   if ( (opt->debug>1) && (opt->log!=NULL) ) {\
  164.   fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\
  165.   test_flush;\
  166.   }\
  167.   } else {\
  168.   ok=0;\
  169.   } \
  170.   }\
  171.   if (!ok) { \
  172.   fp=filecreate(savename); \
  173.   if (fp) { \
  174.   if (ht_len>0) {\
  175.   if ((INTsys)fwrite(ht_buff,1,ht_len,fp) != ht_len) { \
  176.   int fcheck;\
  177.   if ((fcheck=check_fatal_io_errno())) {\
  178.   opt->state.exit_xh=-1;\
  179.   }\
  180.   if (opt->errlog) {   \
  181.   fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\
  182.   if (fcheck) {\
  183.   fspc(opt->errlog,"error");\
  184.   fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\
  185.   }\
  186.   test_flush;\
  187.   }\
  188.   }\
  189.   }\
  190.   fclose(fp); fp=NULL; \
  191.   if (strnotempty(r->lastmodified)) \
  192.   set_filetime_rfc822(savename,r->lastmodified); \
  193.   } else {\
  194.   int fcheck;\
  195.   if ((fcheck=check_fatal_io_errno())) {\
  196.   opt->state.exit_xh=-1;\
  197.   }\
  198.   if (opt->errlog) { \
  199.   fspc(opt->errlog,"error");\
  200.   fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\
  201.   if (fcheck) {\
  202.   fspc(opt->errlog,"error");\
  203.   fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\
  204.   }\
  205.   test_flush;\
  206.   }\
  207.   }\
  208.   } else {\
  209.   filenote(savename,NULL); \
  210.   }\
  211.   if (cache->ndx)\
  212.   cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
  213.   } \
  214.   freet(ht_buff); ht_buff=NULL; \
  215. }
  216. #define HT_ADD_FOP 
  217.  
  218. // COPY IN HTSCORE.C
  219. #define HT_INDEX_END do { \
  220.   if (!makeindex_done) { \
  221.   if (makeindex_fp) { \
  222.   char BIGSTK tempo[1024]; \
  223.   if (makeindex_links == 1) { \
  224.   sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
  225.   } else \
  226.   tempo[0]='\0'; \
  227.   fprintf(makeindex_fp,template_footer, \
  228.   "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
  229.   tempo \
  230.   ); \
  231.   fflush(makeindex_fp); \
  232.   fclose(makeindex_fp);  /* α ne pas oublier sinon on passe une nuit blanche */  \
  233.   makeindex_fp=NULL; \
  234.   usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary");  \
  235.   } \
  236.   } \
  237.   makeindex_done=1;    /* ok c'est fait */  \
  238. } while(0)
  239.  
  240. // Enregistrement d'un lien:
  241. // on calcule la taille nΘcessaire: taille des 3 chaεnes α stocker (taille forcΘe paire, plus 2 octets de sΘcuritΘ)
  242. // puis on vΘrifie qu'on a assez de marge dans le buffer - sinon on en rΘalloue un autre
  243. // enfin on Θcrit α l'adresse courante du buffer, qu'on incrΘmente. on dΘcrΘmente la taille dispo d'autant ensuite
  244. // codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
  245. // FA,FS: former_adr et former_fil, lien original
  246. #if HTS_HASH
  247. #define liens_record_sav_len(A) 
  248. #else
  249. #define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
  250. #endif
  251.  
  252. // COPIE DE HTSCORE.C
  253. #define liens_record(A,F,S,FA,FF) { \
  254.   int notecode=0; \
  255.   int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
  256.   adr_len=strlen(A),\
  257.   fil_len=strlen(F),\
  258.   sav_len=strlen(S),\
  259.   cod_len=0,\
  260.   former_adr_len=strlen(FA),\
  261.   former_fil_len=strlen(FF); \
  262.   if (former_adr_len>0) {\
  263.   former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  264.   former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  265.   } else former_adr_len=former_fil_len=0;\
  266.   if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
  267.   cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
  268.   adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  269.   if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
  270.   lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
  271.   lien_size=add_tab_alloc; \
  272.   if (lien_buffer!=NULL) { \
  273.   liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
  274.   liens[lien_tot]->firstblock=1; \
  275.   } \
  276.   } else { \
  277.   liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
  278.   liens[lien_tot]->firstblock=0; \
  279.   } \
  280.   if (liens[lien_tot]!=NULL) { \
  281.   liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
  282.   liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
  283.   liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
  284.   liens[lien_tot]->cod=NULL; \
  285.   if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
  286.   if (former_adr_len>0) {\
  287.   liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
  288.   liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
  289.   strcpybuff(liens[lien_tot]->former_adr,FA); \
  290.   strcpybuff(liens[lien_tot]->former_fil,FF); \
  291.   }\
  292.   strcpybuff(liens[lien_tot]->adr,A); \
  293.   strcpybuff(liens[lien_tot]->fil,F); \
  294.   strcpybuff(liens[lien_tot]->sav,S); \
  295.   liens_record_sav_len(liens[lien_tot]); \
  296.   hash_write(hashptr,lien_tot,opt->urlhack);  \
  297.   } \
  298. }
  299.  
  300. #define ENGINE_LOAD_CONTEXT() \
  301.   lien_url** liens = (lien_url**) str->liens; \
  302.   httrackp* opt = (httrackp*) str->opt; \
  303.   lien_back* back = (lien_back*) str->back; \
  304.   cache_back* cache = (cache_back*) str->cache; \
  305.   hash_struct* hashptr = (hash_struct*) str->hashptr; \
  306.   int back_max = str->back_max; \
  307.   int numero_passe = str->numero_passe; \
  308.   int add_tab_alloc = str->add_tab_alloc; \
  309.   /* */ \
  310.   int lien_tot = * ( (int*) (str->lien_tot_) ); \
  311.   int ptr = * ( (int*) (str->ptr_) ); \
  312.   int lien_size = * ( (int*) (str->lien_size_) ); \
  313.   char* lien_buffer = * ( (char**) (str->lien_buffer_) ); \
  314.   /* */ \
  315.   /* */ \
  316.   htsblk* r = stre->r_; \
  317.   hash_struct* hash = stre->hash_; \
  318.   int lien_max = *stre->lien_max_; \
  319.   /* */ \
  320.   int error = * stre->error_; \
  321.   int store_errpage = * stre->store_errpage_; \
  322.   char* codebase = stre->codebase; \
  323.   char* base = stre->base; \
  324.   /* */ \
  325.   int makeindex_done = *stre->makeindex_done_; \
  326.   FILE* makeindex_fp = *stre->makeindex_fp_; \
  327.   int makeindex_links = *stre->makeindex_links_; \
  328.   char* makeindex_firstlink = stre->makeindex_firstlink_; \
  329.   /* */ \
  330.   char *template_header = stre->template_header_; \
  331.   char *template_body = stre->template_body_; \
  332.   char *template_footer = stre->template_footer_; \
  333.   /* */ \
  334.   LLint stat_fragment = *stre->stat_fragment_; \
  335.   TStamp makestat_time = stre->makestat_time; \
  336.   FILE* makestat_fp = stre->makestat_fp
  337.  
  338. #define ENGINE_SAVE_CONTEXT() \
  339.   /* Apply changes */ \
  340.   * ( (int*) (str->lien_tot_) ) = lien_tot; \
  341.   * ( (int*) (str->ptr_) ) = ptr; \
  342.   * ( (int*) (str->lien_size_) ) = lien_size; \
  343.   * ( (char**) (str->lien_buffer_) ) = lien_buffer; \
  344.   /* */ \
  345.   * stre->error_ = error; \
  346.   * stre->store_errpage_ = store_errpage; \
  347.   * stre->lien_max_ = lien_max; \
  348.   /* */ \
  349.   *stre->makeindex_done_ = makeindex_done; \
  350.   *stre->makeindex_fp_ = makeindex_fp; \
  351.   *stre->makeindex_links_ = makeindex_links; \
  352.   /* */ \
  353.   *stre->stat_fragment_ = stat_fragment
  354.  
  355. #define _FILTERS     (*opt->filters.filters)
  356. #define _FILTERS_PTR (opt->filters.filptr)
  357. #define _ROBOTS      ((robots_wizard*)opt->robotsptr)
  358.  
  359. /* Apply current *adr character for the script automate */
  360. #define AUTOMATE_LOOKUP_CURRENT_ADR() do { \
  361.   if (inscript) { \
  362.   int new_state_pos; \
  363.   new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \
  364.   if (new_state_pos < 0) { \
  365.   new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \
  366.   } \
  367.   assertf(new_state_pos >= 0); \
  368.   assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state)); \
  369.   inscript_state_pos=new_state_pos; \
  370.   } \
  371. } while(0)  
  372.  
  373. /* Increment current pointer to 'steps' characters, modifying automate if necessary */
  374. #define INCREMENT_CURRENT_ADR(steps) do { \
  375.   int steps__ = (steps); \
  376.   while(steps__ > 0) { \
  377.   adr++; \
  378.   AUTOMATE_LOOKUP_CURRENT_ADR(); \
  379.   steps__ --; \
  380.   } \
  381. } while(0)
  382.  
  383.  
  384. /* Main parser */
  385. int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  386.   /* Load engine variables */
  387.   ENGINE_LOAD_CONTEXT();
  388.  
  389. #if HTS_ANALYSTE
  390.   {
  391.     char* cAddr = r->adr;
  392.     int cSize = (int) r->size;
  393.     if ( (opt->debug>0) && (opt->log!=NULL) ) {
  394.       fspc(opt->log,"info"); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil);
  395.     }
  396.     if (hts_htmlcheck_preprocess(&cAddr, &cSize, urladr, urlfil) == 1) {
  397.       r->adr = cAddr;
  398.       r->size = cSize;
  399.     }
  400.   }
  401.   if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) {
  402. #endif          
  403.     FILE* fp=NULL;      // fichier Θcrit localement 
  404.     char* adr=r->adr;    // pointeur (on parcourt)
  405.     char* lastsaved;    // adresse du dernier octet sauvΘ + 1
  406.     if ( (opt->debug>1) && (opt->log!=NULL) ) {
  407.       fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush;
  408.     }
  409.  
  410.  
  411.     // Indexing!
  412. #if HTS_MAKE_KEYWORD_INDEX
  413.     if (opt->kindex) {
  414.       if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) {
  415.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  416.           fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush;
  417.         }
  418.       } else {
  419.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  420.           fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush;
  421.         }
  422.       }
  423.     }
  424. #endif
  425.  
  426.     // Now, parsing
  427.     if ((opt->getmode & 1) && (ptr>0)) {  // rΘcupΘrer les html sur disque       
  428.       // crΘer le fichier html local
  429.       HT_ADD_FOP;   // Θcrire peu α peu le fichier
  430.     }
  431.  
  432.     if (!error) {
  433.       int detect_title=0;  // dΘtection  du title
  434.       int back_add_stats = opt->state.back_add_stats;
  435.       //
  436.       char* in_media=NULL; // in other media type (real media and so..)
  437.       int intag=0;         // on est dans un tag
  438.       int incomment=0;     // dans un <!--
  439.       int inscript=0;      // dans un scipt pour applets javascript)
  440.       signed char inscript_state[10][257];
  441.       typedef enum { 
  442.         INSCRIPT_START=0,
  443.         INSCRIPT_ANTISLASH,
  444.         INSCRIPT_INQUOTE,
  445.         INSCRIPT_INQUOTE2,
  446.         INSCRIPT_SLASH,
  447.         INSCRIPT_SLASHSLASH,
  448.         INSCRIPT_COMMENT,
  449.         INSCRIPT_COMMENT2,
  450.         INSCRIPT_ANTISLASH_IN_QUOTE,
  451.         INSCRIPT_ANTISLASH_IN_QUOTE2,
  452.         INSCRIPT_DEFAULT=256
  453.       } INSCRIPT;
  454.       INSCRIPT inscript_state_pos=INSCRIPT_START;
  455.       char* inscript_name=NULL; // script tag name
  456.       int inscript_tag=0;  // on est dans un <body onLoad="... terminΘ par >
  457.       char inscript_tag_lastc='\0';
  458.       // terminaison (" ou ') du "<body onLoad=.."
  459.       int inscriptgen=0;     // on est dans un code gΘnΘrant, ex aprΦs obj.write("..
  460.       //int inscript_check_comments=0, inscript_in_comments=0;    // javascript comments
  461.       char scriptgen_q='\0'; // caractΦre faisant office de guillemet (' ou ")
  462.       int no_esc_utf=0;      // ne pas echapper chars > 127
  463.       int nofollow=0;        // ne pas scanner
  464.       //
  465.       int parseall_lastc='\0';     // dernier caractΦre parsΘ pour parseall
  466.       //int parseall_incomment=0;   // dans un /* */ (exemple: a = /* URL */ "img.gif";)
  467.       //
  468.       char* intag_start=adr;
  469.       char* intag_startattr=NULL;
  470.       int intag_start_valid=0;
  471.       int intag_ctype=0;
  472.       //
  473.       int   parent_relative=0;    // the parent is the base path (.js, .css..)
  474.       HT_ADD_START;    // dΘbuter
  475.  
  476.       /* Initialize script automate for comments, quotes.. */
  477.       memset(inscript_state, 0xff, sizeof(inscript_state));
  478.       inscript_state[INSCRIPT_START][INSCRIPT_DEFAULT]=INSCRIPT_START;     /* by default, stay in START */
  479.       inscript_state[INSCRIPT_START]['\\']=INSCRIPT_ANTISLASH;             /* #1: \ escapes the next character whatever it is */
  480.       inscript_state[INSCRIPT_ANTISLASH][INSCRIPT_DEFAULT]=INSCRIPT_START;
  481.       inscript_state[INSCRIPT_START]['\'']=INSCRIPT_INQUOTE;               /* #2: ' opens quote and only ' returns to 0 */
  482.       inscript_state[INSCRIPT_INQUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE;
  483.       inscript_state[INSCRIPT_INQUOTE]['\'']=INSCRIPT_START;
  484.       inscript_state[INSCRIPT_INQUOTE]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE;
  485.       inscript_state[INSCRIPT_START]['\"']=INSCRIPT_INQUOTE2;              /* #3: " opens double-quote and only " returns to 0 */
  486.       inscript_state[INSCRIPT_INQUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2;
  487.       inscript_state[INSCRIPT_INQUOTE2]['\"']=INSCRIPT_START;
  488.       inscript_state[INSCRIPT_INQUOTE2]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE2;
  489.       inscript_state[INSCRIPT_START]['/']=INSCRIPT_SLASH;                  /* #4: / state, default to #0 */
  490.       inscript_state[INSCRIPT_SLASH][INSCRIPT_DEFAULT]=INSCRIPT_START;
  491.       inscript_state[INSCRIPT_SLASH]['/']=INSCRIPT_SLASHSLASH;             /* #5: // with only LF to escape */
  492.       inscript_state[INSCRIPT_SLASHSLASH][INSCRIPT_DEFAULT]=INSCRIPT_SLASHSLASH;
  493.       inscript_state[INSCRIPT_SLASHSLASH]['\n']=INSCRIPT_START;
  494.       inscript_state[INSCRIPT_SLASH]['*']=INSCRIPT_COMMENT;                /* #6: / * with only * / to escape */
  495.       inscript_state[INSCRIPT_COMMENT][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT;
  496.       inscript_state[INSCRIPT_COMMENT]['*']=INSCRIPT_COMMENT2;             /* #7: closing comments */
  497.       inscript_state[INSCRIPT_COMMENT2][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT;
  498.       inscript_state[INSCRIPT_COMMENT2]['/']=INSCRIPT_START;
  499.       inscript_state[INSCRIPT_COMMENT2]['*']=INSCRIPT_COMMENT2;
  500.       inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE;    /* #8: escape in "" */
  501.       inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2;  /* #9: escape in '' */
  502.  
  503.  
  504.       /* statistics */
  505.       if ((opt->getmode & 1) && (ptr>0)) { 
  506.         /*
  507.         HTS_STAT.stat_files++;
  508.         HTS_STAT.stat_bytes+=r->size;
  509.         */
  510.       }
  511.  
  512.       /* Primary list or URLs */
  513.       if (ptr == 0) {
  514.         intag=1;
  515.         intag_start_valid=0;
  516.       }
  517.       /* Check is the file is a .js file */
  518.       else if (
  519.         (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0)
  520.         || (compare_mime(r->contenttype, str->url_file, "text/css")!=0)
  521.         ) {      /* JavaScript js file */
  522.           inscript=1;
  523.           if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  524.           inscript_name="script";
  525.           intag=1;     // because aprΦs <script> on y est .. - pas utile
  526.           intag_start_valid=0;    // OUI car nous sommes dans du code, plus dans du "vrai" tag
  527.           if ((opt->debug>1) && (opt->log!=NULL)) {
  528.             fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush;
  529.           }
  530.           // for javascript only
  531.           if (compare_mime(r->contenttype, str->url_file, "application/x-javascript") != 0) {
  532.             // all links must be checked against parent, not this link
  533.             if (liens[ptr]->precedent != 0) {
  534.               parent_relative=1;
  535.             }
  536.           }
  537.         }
  538.         /* Or a real audio */
  539.       else if (compare_mime(r->contenttype, str->url_file, "audio/x-pn-realaudio")!=0) {      /* realaudio link file */
  540.         inscript=intag=0;
  541.         inscript_name="media";
  542.         intag_start_valid=0;
  543.         in_media="LNK";       // real media! -> links
  544.       } 
  545.       /* Or a m3u playlist */
  546.       else if (compare_mime(r->contenttype, str->url_file, "audio/x-mpegurl")!=0) {      /* mp3 link file */
  547.         inscript=intag=0;
  548.         inscript_name="media";
  549.         intag_start_valid=0;
  550.         in_media="LNK";       // m3u! -> links
  551.       } 
  552.       else if (compare_mime(r->contenttype, str->url_file, "application/x-authorware-map")!=0) {      /* macromedia aam file */
  553.         inscript=intag=0;
  554.         inscript_name="media";
  555.         intag_start_valid=0;
  556.         in_media="AAM";       // aam
  557.       } 
  558.  
  559.       // Detect UTF8 format
  560.       if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) {
  561.         no_esc_utf=1;
  562.       } else {
  563.         no_esc_utf=0;
  564.       }
  565.       // Hack to prevent any problems with ram files of other files
  566.       * ( r->adr + r->size ) = '\0';
  567.  
  568.  
  569.       // ------------------------------------------------------------
  570.       // analyser ce qu'il y a en mΘmoire (fichier html)
  571.       // on scanne les balises
  572.       // ------------------------------------------------------------
  573. #if HTS_ANALYSTE
  574.       _hts_in_html_done=0;     // 0% scannΘs
  575.       _hts_cancel=0;           // pas de cancel
  576.       _hts_in_html_parsing=1;  // flag pour indiquer un parsing
  577. #endif
  578.       base[0]='\0';    // effacer base-href
  579.       lastsaved=adr;
  580.       do {
  581.         int p=0;
  582.         int valid_p=0;      // force to take p even if == 0
  583.         int ending_p='\0';  // ending quote?
  584.         int archivetag_p=0;  // avoid multiple-archives with commas
  585.         int  unquoted_script=0;
  586.         INSCRIPT inscript_state_pos_prev=inscript_state_pos;
  587.         error=0;
  588.  
  589.         /* Hack to avoid NULL char problems with C syntax */
  590.         /* Yes, some bogus HTML pages can embed null chars
  591.         and therefore can not be properly handled if this hack is not done
  592.         */
  593.         if ( ! (*adr) ) {
  594.           if ( ((int) (adr - r->adr)) < r->size)
  595.             *adr=' ';
  596.         }
  597.  
  598.  
  599.  
  600.         /*
  601.         index.html built here
  602.         */
  603.         // Construction index.html (sommaire)
  604.         // Avant de tester les a href,
  605.         // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
  606.         if (!makeindex_done) {  // autoriation d'Θcrire un index
  607.           if (!detect_title) {
  608.             if (opt->depth == liens[ptr]->depth) {    // on note toujours les premiers liens
  609.               if (!in_media) {
  610.                 if (opt->makeindex && (ptr>0)) {
  611.                   if (opt->getmode & 1) {  // autorisation d'Θcrire
  612.                     p=strfield(adr,"title");  
  613.                     if (p) {
  614.                       if (*(adr-1)=='/') p=0;    // /title
  615.                     } else {
  616.                       if (strfield(adr,"/html"))
  617.                         p=-1;                    // noter, mais sans titre
  618.                       else if (strfield(adr,"body"))
  619.                         p=-1;                    // noter, mais sans titre
  620.                       else if ( ((int) (adr - r->adr) ) >= (r->size-1) )
  621.                         p=-1;                    // noter, mais sans titre
  622.                       else if ( (int) (adr - r->adr) >= r->size - 2)   // we got to hurry
  623.                         p=-1; // xxc xxc xxc
  624.                     }
  625.                   } else
  626.                     p=0;
  627.  
  628.                   if (p) {    // ok center                            
  629.                     if (makeindex_fp==NULL) {
  630.                       verif_backblue(opt,opt->path_html);    // gΘnΘrer gif
  631.                       makeindex_fp=filecreate(fconcat(opt->path_html,"index.html"));
  632.                       if (makeindex_fp!=NULL) {
  633.  
  634.                         // Header
  635.                         fprintf(makeindex_fp,template_header,
  636.                           "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
  637.                           );
  638.  
  639.                       } else makeindex_done=-1;    // fait, erreur
  640.                     }
  641.  
  642.                     if (makeindex_fp!=NULL) {
  643.                       char BIGSTK tempo[HTS_URLMAXSIZE*2];
  644.                       char BIGSTK s[HTS_URLMAXSIZE*2];
  645.                       char* a=NULL;
  646.                       char* b=NULL;
  647.                       s[0]='\0';
  648.                       if (p>0) {
  649.                         a=strchr(adr,'>');
  650.                         if (a!=NULL) {
  651.                           a++;
  652.                           while(is_space(*a)) a++;    // sauter espaces & co
  653.                           b=strchr(a,'<');   // prochain tag
  654.                         }
  655.                       }
  656.                       if (lienrelatif(tempo,liens[ptr]->sav,concat(opt->path_html,"index.html"))==0) {
  657.                         detect_title=1;      // ok dΘtectΘ pour cette page!
  658.                         makeindex_links++;   // un de plus
  659.                         strcpybuff(makeindex_firstlink,tempo);
  660.                         //
  661.  
  662.                         /* Hack */
  663.                         if (opt->mimehtml) {
  664.                           strcpybuff(makeindex_firstlink, "cid:primary/primary");
  665.                         }
  666.  
  667.                         if ((b==a) || (a==NULL) || (b==NULL)) {    // pas de titre
  668.                           strcpybuff(s,tempo);
  669.                         } else if ((b-a)<256) {
  670.                           b--;
  671.                           while(is_space(*b)) b--;
  672.                           strncpy(s,a,b-a+1);
  673.                           *(s+(b-a)+1)='\0';
  674.                         }
  675.  
  676.                         // Body
  677.                         fprintf(makeindex_fp,template_body,
  678.                           tempo,
  679.                           s
  680.                           );
  681.  
  682.                       }
  683.                     }
  684.                   }
  685.                 }
  686.               }
  687.  
  688.             } else if (liens[ptr]->depth<opt->depth) {   // on a sautΘ level1+1 et level1
  689.               HT_INDEX_END;
  690.             }
  691.           } // if (opt->makeindex)
  692.         }
  693.         // FIN Construction index.html (sommaire)
  694.         /*
  695.         end -- index.html built here
  696.         */
  697.  
  698.  
  699.  
  700.         /* Parse */
  701.         if (
  702.           (*adr=='<')    /* No starting tag */
  703.           && (!inscript)    /* Not in (java)script */
  704.           && (!incomment)   /* Not in comment (<!--) */
  705.           && (!in_media)    /* Not in media */
  706.           ) { 
  707.             intag=1;
  708.             intag_ctype=0;
  709.             //parseall_incomment=0;
  710.             //inquote=0;  // effacer quote
  711.             intag_start=adr; intag_start_valid=1;
  712.             codebase[0]='\0';    // effacer Θventuel codebase
  713.  
  714.             if (opt->getmode & 1) {  // sauver html
  715.               p=strfield(adr,"</html");
  716.               if (p==0) p=strfield(adr,"<head>");
  717.               // if (p==0) p=strfield(adr,"<doctype");
  718.               if (p) {
  719.                 char* eol="\n";
  720.                 if (strchr(r->adr,'\r'))
  721.                   eol="\r\n";
  722.                 if (strnotempty(opt->footer)) {
  723.                   char BIGSTK tempo[1024+HTS_URLMAXSIZE*2];
  724.                   char gmttime[256];
  725.                   tempo[0]='\0';
  726.                   time_gmt_rfc822(gmttime);
  727.                   strcatbuff(tempo,eol);
  728.                   sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","","");
  729.                   strcatbuff(tempo,eol);
  730.                   //fwrite(tempo,1,strlen(tempo),fp);
  731.                   HT_ADD(tempo);
  732.                   if (r->charset[0]) {
  733.                     HT_ADD("<!-- Added by HTTrack --><meta http-equiv=\"content-type\" content=\"text/html;charset=");
  734.                     HT_ADD(r->charset);
  735.                     HT_ADD("\"><!-- /Added by HTTrack -->");
  736.                     HT_ADD(eol);
  737.                   }
  738.                 }
  739.               }
  740.             }        
  741.  
  742.             // Θliminer les <!-- (commentaires) : intag dΘvalidΘ
  743.             if (*(adr+1)=='!')
  744.               if (*(adr+2)=='-')
  745.                 if (*(adr+3)=='-') {
  746.                   intag=0;
  747.                   incomment=1;
  748.                   intag_start_valid=0;
  749.                 }
  750.  
  751.           }
  752.         else if (
  753.           (*adr=='>')                        /* ending tag */
  754.           && ( (!inscript && !in_media) || (inscript_tag) )  /* and in tag (or in script) */
  755.           ) {
  756.             if (inscript_tag) {
  757.               inscript_tag=inscript=0;
  758.               intag=0;
  759.               incomment=0;
  760.               intag_start_valid=0;
  761.               if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
  762.             } else if (!incomment) {
  763.               intag=0; //inquote=0;
  764.  
  765.               // entrΘe dans du javascript?
  766.               // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
  767.               //if (!inscript) {  // sinon on est dans un obj.write("..
  768.               if ((intag_start_valid) && 
  769.                 (
  770.                 check_tag(intag_start,"script")
  771.                 ||
  772.                 check_tag(intag_start,"style")
  773.                 )
  774.                 ) {
  775.                   char* a=intag_start;    // <
  776.                   // ** while(is_realspace(*(--a)));
  777.                   if (*a=='<') {  // s√r que c'est un tag?
  778.                     if (check_tag(intag_start,"script"))
  779.                       inscript_name="script";
  780.                     else
  781.                       inscript_name="style";
  782.                     inscript=1;
  783.                     inscript_state_pos=INSCRIPT_START;
  784.                     intag=1;     // because aprΦs <script> on y est .. - pas utile
  785.                     intag_start_valid=0;    // OUI car nous sommes dans du code, plus dans du "vrai" tag
  786.                     if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  787.                   }
  788.                 }
  789.             } else {                               /* end of comment? */
  790.               // vΘrifier fermeture correcte
  791.               if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
  792.                 intag=0;
  793.                 incomment=0;
  794.                 intag_start_valid=0;
  795.               }
  796. #if GT_ENDS_COMMENT
  797.               /* wrong comment ending */
  798.               else {
  799.                 /* check if correct ending does not exists
  800.                 <!-- foo > example <!-- bar > is sometimes accepted by browsers
  801.                 when no --> is used somewhere else.. darn those browsers are dirty
  802.                 */
  803.                 if (!strstr(adr,"-->")) {
  804.                   intag=0;
  805.                   incomment=0;
  806.                   intag_start_valid=0;
  807.                 }
  808.               }
  809. #endif
  810.             }
  811.             //}
  812.           }
  813.           //else if (*adr==34) {
  814.           //  inquote=(inquote?0:1);
  815.           //}
  816.         else if (intag || inscript || in_media) {    // nous sommes dans un tag/commentaire, tester si on recoit un tag
  817.           int p_type=0;
  818.           int p_nocatch=0;
  819.           int p_searchMETAURL=0;  // chercher ..URL=<url>
  820.           int add_class=0;        // ajouter .class
  821.           int add_class_dots_to_patch=0;   // number of '.' in code="x.y.z<realname>"
  822.           char* p_flush=NULL;
  823.  
  824.  
  825.           // ------------------------------------------------------------
  826.           // parsing ΘvolΘ
  827.           // ------------------------------------------------------------
  828.           if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (in_media) || (inscriptgen))) {  // sinon pas la peine de tester..
  829.  
  830.  
  831.             /* caractΦre de terminaison pour "miniparsing" javascript=.. ? 
  832.             (ex: <a href="javascript:()" action="foo"> ) */
  833.             if (inscript_tag) {
  834.               if (inscript_tag_lastc) {
  835.                 if (*adr == inscript_tag_lastc) {
  836.                   /* sortir */
  837.                   inscript_tag=inscript=0;
  838.                   incomment=0;
  839.                   if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
  840.                 }
  841.               }
  842.             }
  843.  
  844.             /* automate */
  845.             AUTOMATE_LOOKUP_CURRENT_ADR();
  846.  
  847.  
  848.             // Note:
  849.             // Certaines pages ne respectent pas le html
  850.             // notamment les guillements ne sont pas fixΘs
  851.             // Nous sommes dans un tag, donc on peut faire un test plus
  852.             // large pour pouvoi prendre en compte ces particularitΘs
  853.  
  854.             // α vΘrifier: ACTION, CODEBASE, VRML
  855.  
  856.             if (in_media) {
  857.               if (strcmp(in_media,"LNK")==0) { // real media
  858.                 p=0;
  859.                 valid_p=1;
  860.               }
  861.               else if (strcmp(in_media,"AAM")==0) { // AAM
  862.                 if (is_space((unsigned char)adr[0]) && ! is_space((unsigned char)adr[1])) {
  863.                   char* a = adr + 1;
  864.                   int n = 0;
  865.                   int ok = 0;
  866.                   int dot = 0;
  867.                   while(n < HTS_URLMAXSIZE/2 && a[n] != '\0' &&
  868.                     ( ! is_space((unsigned char)a[n]) || ! ( ok = 1) )
  869.                     ) {
  870.                       if (a[n] == '.') {
  871.                         dot = n;
  872.                       }
  873.                       n++;
  874.                     }
  875.                     if (ok && dot > 0) {
  876.                       char BIGSTK tmp[HTS_URLMAXSIZE/2 + 2];
  877.                       tmp[0] = '\0';
  878.                       strncat(tmp, a + dot + 1, n - dot - 1);
  879.                       if (is_knowntype(tmp) || ishtml_ext(tmp) != -1) {
  880.                         adr++;
  881.                         p = 0;
  882.                         valid_p = 1;
  883.                         unquoted_script = 1;
  884.                       }
  885.                     }
  886.                 }
  887.               }
  888.             } else if (ptr>0) {        /* pas premiΦre page 0 (primary) */
  889.               p=0;  // saut pour le nom de fichier: adresse nom fichier=adr+p
  890.  
  891.               // ------------------------------
  892.               // dΘtection d'Θcriture JavaScript.
  893.               // osons les obj.write et les obj.href=.. ! osons!
  894.               // note: inscript==1 donc on sautera aprΦs les \"
  895.               if (inscript) {
  896.                 if (inscriptgen) {          // on est dΘja dans un objet gΘnΘrant..
  897.                   if (*adr==scriptgen_q) {  // fermeture des " ou '
  898.                     if (*(adr-1)!='\\') {   // non
  899.                       inscriptgen=0;        // ok parsing terminΘ
  900.                     }
  901.                   }
  902.                 } else {
  903.                   char* a=NULL;
  904.                   char check_this_fking_line=0;  // parsing code javascript..
  905.                   char must_be_terminated=0;     // caractΦre obligatoire de terminaison!
  906.                   int token_size;
  907.                   if (!(token_size=strfield(adr,".writeln"))) // dΘtection ...objet.write[ln]("code html")...
  908.                     token_size=strfield(adr,".write");
  909.                   if (token_size) {
  910.                     a=adr+token_size;
  911.                     while(is_realspace(*a)) a++; // sauter espaces
  912.                     if (*a=='(') {  // dΘbut parenthΦse
  913.                       check_this_fking_line=2;  // α parser!
  914.                       must_be_terminated=')';
  915.                       a++;  // sauter (
  916.                     }
  917.                   }
  918.                   // euhh ??? ???
  919.                   /* else if (strfield(adr,".href")) {  // dΘtection ...objet.href="...
  920.                   a=adr+5;
  921.                   while(is_realspace(*a)) a++; // sauter espaces
  922.                   if (*a=='=') {  // ohh un Θgal
  923.                   check_this_fking_line=1;  // α noter!
  924.                   must_be_terminated=';';   // et si t'as oubliΘ le ; tu sais pas coder
  925.                   a++;   // sauter =
  926.                   }
  927.  
  928.                   }*/
  929.  
  930.                   // on a un truc du genre instruction"code gΘnΘrΘ" dont on parse le code
  931.                   if (check_this_fking_line) {
  932.                     while(is_realspace(*a)) a++;
  933.                     if ((*a=='\'') || (*a=='"')) {  // dΘpart de '' ou ""
  934.                       char *b;
  935.                       scriptgen_q=*a;    // quote
  936.                       b=a+1;      // dΘpart de la chaεne
  937.                       // vΘrifier forme ("code") et pas ("code"+var), ingΘrable
  938.                       do {
  939.                         if (*a==scriptgen_q && *(a-1)!='\\')  // quote non slash
  940.                           break;            // sortie
  941.                         else if (*a==10 && *(a-1) != '\\'  /* LF and no continue (\) character */
  942.                           && ( *(a-1) != '\r' || *(a-2) != '\\' ) )  /* and not CRLF and no .. */
  943.                           break;
  944.                         else 
  945.                           a++;  // caractΦre suivant
  946.                       } while((a-b) < HTS_URLMAXSIZE / 2);
  947.                       if (*a==scriptgen_q) {  // fin du quote
  948.                         a++;
  949.                         while(is_realspace(*a)) a++;
  950.                         if (*a==must_be_terminated) {  // parenthΦse fermante: ("..")
  951.  
  952.                           // bon, on doit parser une ligne javascript
  953.                           // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
  954.                           // on fixe p sur le saut nΘcessaire pour atteindre le nom du fichier
  955.                           // et le moteur se dΘbrouillera ensuite tout seul comme un grand
  956.                           // 2) si check==2 c'est un peu plus tordu car lα on gΘnΘre du
  957.                           // code html au sein de code javascript au sein de code html
  958.                           // dans ce cas on doit fixer un flag α un puis ensuite dans la boucle
  959.                           // on devra parser les instructions standard comme <a href etc
  960.                           // NOTE: le code javascript autogΘnΘrΘ n'est pas pris en compte!!
  961.                           // (et ne marche pas dans 50% des cas de toute facon!)
  962.                           if (check_this_fking_line==1) {
  963.                             p=(int) (b - adr);    // calculer saut!
  964.                           } else {
  965.                             inscriptgen=1;        // SCRIPTGEN actif
  966.                             adr=b;                // jump
  967.                           }
  968.  
  969.                           if ((opt->debug>1) && (opt->log!=NULL)) {
  970.                             char str[512];
  971.                             str[0]='\0';
  972.                             strncatbuff(str,b,minimum((int) (a - b + 1), 32));
  973.                             fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
  974.                           }
  975.                         }
  976.  
  977.                       }
  978.  
  979.                     }
  980.  
  981.  
  982.                   }
  983.                 }
  984.               }
  985.               // fin detection code gΘnΘrant javascript vers html
  986.               // ------------------------------
  987.  
  988.  
  989.               // analyse proprement dite, A HREF=.. etc..
  990.               if (!p) {
  991.                 // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
  992.                 if ((intag && (!inscript)) || inscriptgen) {
  993.                   if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) {   // <tag < tag etc
  994.                     // <A HREF=.. pour les liens HTML
  995.                     p=rech_tageq(adr,"href");
  996.                     if (p) {    // href.. tester si c'est une bas href!
  997.                       if ((intag_start_valid) && check_tag(intag_start,"base")) {  // oui!
  998.                         // ** note: base href et codebase ne font pas bon mΘnage..
  999.                         p_type=2;    // c'est un chemin
  1000.                       }
  1001.                     }
  1002.  
  1003.                     /* Tags supplΘmentaires α vΘrifier (<img src=..> etc) */
  1004.                     if (p==0) {
  1005.                       int i=0;
  1006.                       while( (p==0) && (strnotempty(hts_detect[i])) ) {
  1007.                         p=rech_tageq(adr,hts_detect[i]);
  1008.                         if (p) {
  1009.                           /* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */
  1010.                           if (strcmp(hts_detect[i], "archive") == 0) {
  1011.                             archivetag_p = 1;
  1012.                           }
  1013.                         }
  1014.                         i++;
  1015.                       }
  1016.                     }
  1017.  
  1018.                     /* Tags supplΘmentaires en dΘbut α vΘrifier (<object .. hotspot1=..> etc) */
  1019.                     if (p==0) {
  1020.                       int i=0;
  1021.                       while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
  1022.                         p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
  1023.                         i++;
  1024.                       }
  1025.                     }
  1026.  
  1027.                     /* Tags supplΘmentaires α vΘrifier : URL=.. */
  1028.                     if (p==0) {
  1029.                       int i=0;
  1030.                       while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
  1031.                         p=rech_tageq(adr,hts_detectURL[i]);
  1032.                         i++;
  1033.                       }
  1034.                       if (p) {
  1035.                         if (intag_ctype == 1) {
  1036.                           p = 0;
  1037. #if 0
  1038.                           //if ((pos=rech_tageq(adr, "content"))) {
  1039.                           char temp[256];
  1040.                           char* token = NULL;
  1041.                           int len = rech_endtoken(adr + pos, &token);
  1042.                           if (len > 0 && len < sizeof(temp) - 2) {
  1043.                             char* chpos;
  1044.                             temp[0] = '\0';
  1045.                             strncat(temp, token, len);
  1046.                             if ((chpos = strstr(temp, "charset"))
  1047.                               &&
  1048.                               (chpos = strchr(chpos, '='))
  1049.                               ) {
  1050.                                 chpos++;
  1051.                                 while(is_space(*chpos)) chpod++;
  1052.                                 chpos
  1053.                               }
  1054.                           }
  1055. #endif
  1056.                         }
  1057.                         // <META HTTP-EQUIV="Refresh" CONTENT="3;URL=http://www.example.com">
  1058.                         else if (intag_ctype == 2) {
  1059.                           p_searchMETAURL=1;
  1060.                         } else {
  1061.                           p = 0;            /* cancel */
  1062.                         }
  1063.                       }
  1064.  
  1065.  
  1066.                     }
  1067.  
  1068.                     /* Tags supplΘmentaires α vΘrifier, mais α ne pas capturer */
  1069.                     if (p==0) {
  1070.                       int i=0;
  1071.                       while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
  1072.                         p=rech_tageq(adr,hts_detectandleave[i]);
  1073.                         i++;
  1074.                       }
  1075.                       if (p)
  1076.                         p_nocatch=1;      /* ne pas rechercher */
  1077.                     }
  1078.  
  1079.                     /* EvΘnements */
  1080.                     if (p==0 && 
  1081.                       ! inscript          /* we don't want events inside document.write */
  1082.                       ) {
  1083.                         int i=0;
  1084.                         /* dΘtection onLoad etc */
  1085.                         while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
  1086.                           p=rech_tageq(adr,hts_detect_js[i]);
  1087.                           i++;
  1088.                         }
  1089.                         /* non dΘtectΘ - dΘtecter Θgalement les onXxxxx= */
  1090.                         if (p==0) {
  1091.                           if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
  1092.                             p=0;
  1093.                             while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
  1094.                             if (p<64) {
  1095.                               while(is_space(adr[p])) p++;
  1096.                               if (adr[p]=='=')
  1097.                                 p++;
  1098.                               else p=0;
  1099.                             } else p=0;
  1100.                           }
  1101.                         }
  1102.                         /* OK, ΘvΘnement repΘrΘ */
  1103.                         if (p) {
  1104.                           inscript_tag_lastc=*(adr+p);     /* α attendre α la fin */
  1105.                           adr+=p+1;   /* saut */
  1106.                           /*
  1107.                           On est dΘsormais dans du code javascript
  1108.                           */
  1109.                           inscript_name="";
  1110.                           inscript=inscript_tag=1;
  1111.                           inscript_state_pos=INSCRIPT_START;
  1112.                           if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  1113.                         }
  1114.                         p=0;        /* quoi qu'il arrive, ne rien dΘmarrer ici */
  1115.                       }
  1116.  
  1117.                       // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) α faire]
  1118.                       if (p==0) {
  1119.                         p=rech_tageq(adr,"code");
  1120.                         if (p) {
  1121.                           if ((intag_start_valid) && check_tag(intag_start,"applet")) {  // dans un <applet !
  1122.                             p_type=-1;  // juste le nom de fichier+dossier, Θcire avant codebase 
  1123.                             add_class=1;   // ajouter .class au besoin                         
  1124.  
  1125.                             // vΘrifier qu'il n'y a pas de codebase APRES
  1126.                             // sinon on swappe les deux.
  1127.                             // pas trΦs propre mais c'est ce qu'il y a de plus simple α faire!!
  1128.  
  1129.                             {
  1130.                               char *a;
  1131.                               a=adr;
  1132.                               while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
  1133.                               if (rech_tageq(a,"codebase")) {  // banzai! codebase=
  1134.                                 char* b;
  1135.                                 b=strchr(a,'>');
  1136.                                 if (b) {
  1137.                                   if (((int) (b - adr)) < 1000) {    // au total < 1Ko
  1138.                                     char BIGSTK tempo[HTS_URLMAXSIZE*2];
  1139.                                     tempo[0]='\0';
  1140.                                     strncatbuff(tempo,a,(int) (b - a) );
  1141.                                     strcatbuff( tempo," ");
  1142.                                     strncatbuff(tempo,adr,(int) (a - adr - 1));
  1143.                                     // Θventuellement remplire par des espaces pour avoir juste la taille
  1144.                                     while((int) strlen(tempo)<((int) (b - adr)))
  1145.                                       strcatbuff(tempo," ");
  1146.                                     // pas d'erreur?
  1147.                                     if ((int) strlen(tempo) == ((int) (b - adr) )) {
  1148.                                       strncpy(adr,tempo,strlen(tempo));   // PAS d'octet nul α la fin!
  1149.                                       p=0;    // DEVALIDER!!
  1150.                                       p_type=0;
  1151.                                       add_class=0;
  1152.                                     }
  1153.                                   }
  1154.                                 }
  1155.                               }
  1156.                             }
  1157.  
  1158.                           }
  1159.                         }
  1160.                       }
  1161.  
  1162.                       // liens α patcher mais pas α charger (ex: codebase)
  1163.                       if (p==0) {  // note: si non chargΘ (ex: ignorer .class) patchΘ tout de mΩme
  1164.                         p=rech_tageq(adr,"codebase");
  1165.                         if (p) {
  1166.                           if ((intag_start_valid) && check_tag(intag_start,"applet")) {  // dans un <applet !
  1167.                             p_type=-2;
  1168.                           } else p=-1;   // ne plus chercher
  1169.                         }
  1170.                       }
  1171.  
  1172.  
  1173.                       // Meta tags pour robots
  1174.                       if (p==0) {
  1175.                         if (opt->robots) {
  1176.                           if ((intag_start_valid) && check_tag(intag_start,"meta")) {
  1177.                             if (rech_tageq(adr,"name")) {    // name=robots.txt
  1178.                               char tempo[1100];
  1179.                               char* a;
  1180.                               tempo[0]='\0';
  1181.                               a=strchr(adr,'>');
  1182. #if DEBUG_ROBOTS
  1183.                               printf("robots.txt meta tag detected\n");
  1184. #endif
  1185.                               if (a) {
  1186.                                 if (((int) (a - adr)) < 999 ) {
  1187.                                   strncatbuff(tempo,adr,(int) (a - adr));
  1188.                                   if (strstrcase(tempo,"content")) {
  1189.                                     if (strstrcase(tempo,"robots")) {
  1190.                                       if (strstrcase(tempo,"nofollow")) {
  1191. #if DEBUG_ROBOTS
  1192.                                         printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
  1193. #endif
  1194.                                         nofollow=1;       // NE PLUS suivre liens dans cette page
  1195.                                         if (opt->errlog) {
  1196.                                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
  1197.                                           test_flush;
  1198.                                         }
  1199.                                       }
  1200.                                     }
  1201.                                   }
  1202.                                 }
  1203.                               }
  1204.                             }
  1205.                           }
  1206.                         }
  1207.                       }
  1208.  
  1209.                       // charset meta tags
  1210.                       if (p==0) {
  1211.                         if ((intag_start_valid) && check_tag(intag_start,"meta")) {
  1212.                           int pos;
  1213.                           // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  1214.                           if ((pos=rech_tageq(adr, "http-equiv"))) {
  1215.                             const char* token = NULL;
  1216.                             int len = rech_endtoken(adr + pos, &token);
  1217.                             if (len > 0) {
  1218.                               if (strfield(token, "content-type")) {
  1219.                                 intag_ctype=1;
  1220.                               }
  1221.                               else if (strfield(token, "refresh")) {
  1222.                                 intag_ctype=2;
  1223.                               }
  1224.                             }
  1225.                           }
  1226.                         }                    
  1227.                       }
  1228.  
  1229.                       // entrΘe dans une applet javascript
  1230.                       /*if (!inscript) {  // sinon on est dans un obj.write("..
  1231.                       if (p==0)
  1232.                       if (rech_sampletag(adr,"script"))
  1233.                       if (check_tag(intag_start,"script")) {
  1234.                       inscript=1;
  1235.                       }
  1236.                       }*/
  1237.  
  1238.                       // Ici on procΦde α une analyse du code javascript pour tenter de rΘcupΘrer
  1239.                       // certains fichiers Θvidents.
  1240.                       // C'est devenu obligatoire vu le nombre de pages qui intΦgrent
  1241.                       // des images rΘactives par exemple
  1242.                   }
  1243.                 } else if (inscript) {
  1244.  
  1245. #if 0
  1246.                   /* Check // javascript comments */
  1247.                   if (*adr == 10 || *adr == 13) {
  1248.                     inscript_check_comments = 1;
  1249.                     inscript_in_comments = 0;
  1250.                   }
  1251.                   else if (inscript_check_comments) {
  1252.                     if (!is_realspace(*adr)) {
  1253.                       inscript_check_comments = 0;
  1254.                       if (adr[0] == '/' && adr[1] == '/') {
  1255.                         inscript_in_comments = 1;
  1256.                       }
  1257.                     }
  1258.                   }
  1259. #endif
  1260.  
  1261.                   /* Parse */
  1262.                   assertf(inscript_name != NULL);
  1263.                   if (
  1264.                     *adr == '/' &&
  1265.                     (
  1266.                     (strfield(adr,"/script") && strfield(inscript_name, "script"))
  1267.                     ||
  1268.                     (strfield(adr,"/style")  && strfield(inscript_name, "style"))
  1269.                     )
  1270.                     ) {
  1271.                       char* a=adr;
  1272.                       //while(is_realspace(*(--a)));
  1273.                       while( is_realspace(*a) ) a--;
  1274.                       a--;
  1275.                       if (*a=='<') {  // s√r que c'est un tag?
  1276.                         inscript=0;
  1277.                         if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
  1278.                       }
  1279.                     } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) {
  1280.                       /*
  1281.                       Script Analyzing - different types supported:
  1282.                       foo="url"
  1283.                       foo("url") or foo(url)
  1284.                       foo "url"
  1285.                       */
  1286.                       int nc;
  1287.                       char  expected     = '=';          // caractΦre attendu aprΦs
  1288.                       char* expected_end = ";";
  1289.                       int can_avoid_quotes=0;
  1290.                       char quotes_replacement='\0';
  1291.                       int ensure_not_mime=0;
  1292.                       if (inscript_tag)
  1293.                         expected_end=";\"\'";            // voir a href="javascript:doc.location='foo'"
  1294.                       nc = strfield(adr,".src");  // nom.src="image";
  1295.                       if (!nc) nc = strfield(adr,".location");  // document.location="doc"
  1296.                       if (!nc) nc = strfield(adr,":location");  // javascript:location="doc"
  1297.                       if (!nc) nc = strfield(adr,".href");  // document.location="doc"
  1298.                       if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
  1299.                         expected='(';    // parenthΦse
  1300.                         expected_end="),";  // fin: virgule ou parenthΦse
  1301.                         ensure_not_mime=1;  //* ensure the url is not a mime type */
  1302.                       }
  1303.                       if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
  1304.                         expected='(';    // parenthΦse
  1305.                         expected_end=")";  // fin: parenthΦse
  1306.                       }
  1307.                       if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
  1308.                         expected='(';    // parenthΦse
  1309.                         expected_end=")";  // fin: parenthΦse
  1310.                       }
  1311.                       if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) 
  1312.                         && *(adr - 1) != '_'
  1313.                         ) { // url(url)
  1314.                           expected='(';    // parenthΦse
  1315.                           expected_end=")";  // fin: parenthΦse
  1316.                           can_avoid_quotes=1;
  1317.                           quotes_replacement=')';
  1318.                         }
  1319.                         if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
  1320.                           if (is_space(*(adr+nc))) {
  1321.                             expected=0;    // no char expected
  1322.                           } else
  1323.                             nc=0;
  1324.                         }
  1325.                         if (nc) {
  1326.                           char *a;
  1327.                           a=adr+nc;
  1328.                           while(is_realspace(*a)) a++;
  1329.                           if ((*a == expected) || (!expected)) {
  1330.                             if (expected)
  1331.                               a++;
  1332.                             while(is_realspace(*a)) a++;
  1333.                             if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
  1334.                               char *b,*c;
  1335.                               int ndelim=1;
  1336.                               if ((*a==34) || (*a=='\''))
  1337.                                 a++;
  1338.                               else
  1339.                                 ndelim=0;
  1340.                               b=a;
  1341.                               if (ndelim) {
  1342.                                 while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
  1343.                               }
  1344.                               else {
  1345.                                 while((*b != quotes_replacement) && (*b!='\0')) b++;
  1346.                               }
  1347.                               c=b--; c+=ndelim;
  1348.                               while(*c==' ') c++;
  1349.                               if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
  1350.                                 c-=(ndelim+1);
  1351.                                 if ((int) (c - a + 1)) {
  1352.                                   if (ensure_not_mime) {
  1353.                                     int i = 0;
  1354.                                     while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') {
  1355.                                       int p;
  1356.                                       if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') {
  1357.                                         a=NULL;
  1358.                                       }
  1359.                                       i++;
  1360.                                     }
  1361.                                   }
  1362.                                   if (a != NULL) {
  1363.                                     if ((opt->debug>1) && (opt->log!=NULL)) {
  1364.                                       char str[512];
  1365.                                       str[0]='\0';
  1366.                                       strncatbuff(str,a,minimum((int) (c - a + 1),32));
  1367.                                       fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush;
  1368.                                     }
  1369.                                     p=(int) (a - adr);    // p non nul: TRAITER CHAINE COMME FICHIER
  1370.                                     if (can_avoid_quotes) {
  1371.                                       ending_p=quotes_replacement;
  1372.                                     }
  1373.                                   }
  1374.                                 }
  1375.                               }
  1376.  
  1377.  
  1378.                             }
  1379.                           }
  1380.                         }
  1381.  
  1382.                     }
  1383.                 }
  1384.               }
  1385.  
  1386.             } else {      // ptr == 0
  1387.               //p=rech_tageq(adr,"primary");    // lien primaire, yeah
  1388.               p=0;          // No stupid tag anymore, raw link
  1389.               valid_p=1;    // Valid even if p==0
  1390.               while ((adr[p] == '\r') || (adr[p] == '\n'))
  1391.                 p++;
  1392.               //can_avoid_quotes=1;
  1393.               ending_p='\r';
  1394.             }       
  1395.  
  1396.           } else if (isspace((unsigned char)*adr)) {
  1397.             intag_startattr=adr+1;        // attribute in tag (for dirty parsing)
  1398.           }
  1399.  
  1400.  
  1401.           // ------------------------------------------------------------
  1402.           // dernier recours - parsing "sale" : dΘtection systΘmatique des .gif, etc.
  1403.           // risque: gΘnΘrer de faux fichiers parazites
  1404.           // fix: ne parse plus dans les commentaires
  1405.           // ------------------------------------------------------------
  1406.           if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) {   // option parsing "brut"
  1407.             //int incomment_justquit=0;
  1408.             if (!is_realspace(*adr)) {
  1409.               int noparse=0;
  1410.  
  1411.               // Gestion des /* */
  1412. #if 0
  1413.               if (inscript) {
  1414.                 if (parseall_incomment) {
  1415.                   if ((*adr=='/') && (*(adr-1)=='*'))
  1416.                     parseall_incomment=0;
  1417.                   incomment_justquit=1;       // ne pas noter dernier caractΦre
  1418.                 } else {
  1419.                   if ((*adr=='/') && (*(adr+1)=='*'))
  1420.                     parseall_incomment=1;
  1421.                 }
  1422.               } else
  1423.                 parseall_incomment=0;
  1424. #endif
  1425.               /* ensure automate state  0 (not in comments, quotes..) */
  1426.               if (inscript && ( 
  1427.                 inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2
  1428.                 ) ) {
  1429.                   noparse=1;
  1430.                 }
  1431.  
  1432.                 /* vΘrifier que l'on est pas dans un <!-- --> pur */
  1433.                 if ( (!intag) && (incomment) && (!inscript))
  1434.                   noparse=1;        /* commentaire */
  1435.  
  1436.                 // recherche d'URLs
  1437.                 if (!noparse) {
  1438.                   //if ((!parseall_incomment) && (!noparse)) {
  1439.                   if (!p) {                   // non dΘja trouvΘ
  1440.                     if (adr != r->adr) {     // >1 caractΦre
  1441.                       // scanner les chaines
  1442.                       if ((*adr == '\"') || (*adr=='\'')) {         // "xx.gif" 'xx.gif'
  1443.                         if (strchr("=(,",parseall_lastc)) {    // exemple: a="img.gif.. (handles comments)
  1444.                           char *a=adr;
  1445.                           char stop=*adr;  // " ou '
  1446.                           int count=0;
  1447.  
  1448.                           // sauter caractΦres
  1449.                           a++;
  1450.                           // copier
  1451.                           while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
  1452.  
  1453.                           // ok chaine terminΘe par " ou '
  1454.                           if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
  1455.                             char c;
  1456.                             char* aend;
  1457.                             //
  1458.                             aend=a;     // sauver dΘbut
  1459.                             a++;
  1460.                             while(is_taborspace(*a)) a++;
  1461.                             c=*a;
  1462.                             if (strchr("),;>/+\r\n",c)) {     // exemple: ..img.gif";
  1463.                               // le / est pour funct("img.gif" /* URL */);
  1464.                               char BIGSTK tempo[HTS_URLMAXSIZE*2];
  1465.                               char type[256];
  1466.                               int url_ok=0;      // url valide?
  1467.                               tempo[0]='\0'; type[0]='\0';
  1468.                               //
  1469.                               strncatbuff(tempo,adr+1,count);
  1470.                               //
  1471.                               if ((!strchr(tempo,' ')) || inscript) {   // espace dedans: mΘfiance! (sauf dans code javascript)
  1472.                                 int invalid_url=0;
  1473.  
  1474.                                 // escape                              
  1475.                                 unescape_amp(tempo);
  1476.  
  1477.                                 // Couper au # ou ? Θventuel
  1478.                                 {
  1479.                                   char* a=strchr(tempo,'#');
  1480.                                   if (a)
  1481.                                     *a='\0';
  1482.                                   a=strchr(tempo,'?');
  1483.                                   if (a)
  1484.                                     *a='\0';
  1485.                                 }
  1486.  
  1487.                                 // vΘrifier qu'il n'y a pas de caractΦres spΘciaux
  1488.                                 if (!strnotempty(tempo))
  1489.                                   invalid_url=1;
  1490.                                 else if (strchr(tempo,'*')
  1491.                                   || strchr(tempo,'<')
  1492.                                   || strchr(tempo,'>')
  1493.                                   || strchr(tempo,',')    /* list of files ? */
  1494.                                   || strchr(tempo,'\"')    /* potential parsing bug */
  1495.                                   || strchr(tempo,'\'')    /* potential parsing bug */
  1496.                                   )
  1497.                                   invalid_url=1;
  1498.                                 else if (tempo[0] == '.' && isalnum(tempo[1]))   // ".gif"
  1499.                                   invalid_url=1;
  1500.  
  1501.                                 /* non invalide? */
  1502.                                 if (!invalid_url) {
  1503.                                   // Un plus α la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
  1504.                                   if (c!='+') {    // PAS de plus α la fin
  1505. #if 0
  1506.                                     char* a;
  1507. #endif
  1508.                                     // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)                                  
  1509.                                     //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0))  // ok pas de problΦme
  1510.                                     if (
  1511.                                       (strfield(tempo,"http:")) 
  1512.                                       || (strfield(tempo,"ftp:"))
  1513. #if HTS_USEOPENSSL
  1514.                                       || (
  1515.                                       SSL_is_available &&
  1516.                                       (strfield(tempo,"https:"))
  1517.                                       )
  1518. #endif
  1519.                                       )  // ok pas de problΦme
  1520.                                       url_ok=1;
  1521.                                     else if (tempo[strlen(tempo)-1]=='/') {        // un slash: ok..
  1522.                                       if (inscript)   // sinon si pas javascript, mΘfiance (rΘpertoire style base?)
  1523.                                         url_ok=1;
  1524.                                     } 
  1525. #if 0
  1526.                                     else if ((a=strchr(tempo,'/'))) {        // un slash: ok..
  1527.                                       if (inscript) {    // sinon si pas javascript, mΘfiance (style "text/css")
  1528.                                         if (strchr(a+1,'/'))     // un seul / : abandon (STYLE type='text/css')
  1529.                                           if (!strchr(tempo,' '))  // avoid spaces (too dangerous for comments)
  1530.                                             url_ok=1;
  1531.                                       }
  1532.                                     }
  1533. #endif
  1534.                                   }
  1535.                                   // Prendre si extension reconnue
  1536.                                   if (!url_ok) {
  1537.                                     get_httptype(type,tempo,0);
  1538.                                     if (strnotempty(type))     // type reconnu!
  1539.                                       url_ok=1;
  1540.                                     else if (is_dyntype(get_ext(tempo)))  // reconnu php,cgi,asp..
  1541.                                       url_ok=1;
  1542.                                     // MAIS pas les foobar@aol.com !!
  1543.                                     if (strchr(tempo,'@'))
  1544.                                       url_ok=0;
  1545.                                   }
  1546.                                   //
  1547.                                   // Ok, cela pourrait Ωtre une URL
  1548.                                   if (url_ok) {
  1549.  
  1550.                                     // Check if not fodbidden tag (id,name..)
  1551.                                     if (intag_start_valid) {
  1552.                                       if (intag_start)
  1553.                                         if (intag_startattr)
  1554.                                           if (intag)
  1555.                                             if (!inscript)
  1556.                                               if (!incomment) {
  1557.                                                 int i=0,nop=0;
  1558.                                                 while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
  1559.                                                   nop=rech_tageq(intag_startattr,hts_nodetect[i]);
  1560.                                                   i++;
  1561.                                                 }
  1562.                                                 // Forbidden tag
  1563.                                                 if (nop) {
  1564.                                                   url_ok=0;
  1565.                                                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1566.                                                     fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
  1567.                                                   }
  1568.                                                 }
  1569.                                               }
  1570.                                     }
  1571.  
  1572.  
  1573.                                     // Accepter URL, on la traitera comme une URL normale!!
  1574.                                     if (url_ok) {
  1575.                                       valid_p = 1;
  1576.                                       p = 0;
  1577.                                     }
  1578.  
  1579.                                   }
  1580.                                 }
  1581.                               }
  1582.                             }
  1583.                           }
  1584.                         }
  1585.                       }
  1586.                     }
  1587.                   }  // p == 0               
  1588.  
  1589.                 } // not in comment
  1590.  
  1591.                 // plus dans un commentaire
  1592.                 if ( inscript_state_pos == INSCRIPT_START 
  1593.                   && inscript_state_pos_prev == INSCRIPT_START) {
  1594.                     parseall_lastc=*adr;             // caractΦre avant le prochain
  1595.                   }
  1596.  
  1597.  
  1598.             }  // if realspace
  1599.           }  // if parseall
  1600.  
  1601.  
  1602.           // ------------------------------------------------------------
  1603.           // p!=0 : on a repΘrΘ un Θventuel lien
  1604.           // ------------------------------------------------------------
  1605.           //
  1606.           if ((p>0) || (valid_p)) {    // on a repΘrΘ un lien
  1607.             //int lien_valide=0;
  1608.             char* eadr=NULL;          /* fin de l'URL */
  1609.             char* quote_adr=NULL;     /* adresse du ? dans l'adresse */
  1610.             int ok=1;
  1611.             char quote='\0';
  1612.             int quoteinscript=0;
  1613.             int  noquote=0;
  1614.  
  1615.             // si nofollow ou un stop a ΘtΘ dΘclenchΘ, rΘΘcrire tous les liens en externe
  1616.             if ((nofollow) || (opt->state.stop))
  1617.               p_nocatch=1;
  1618.  
  1619.             // Θcrire codebase avant, flusher avant code
  1620.             if ((p_type==-1) || (p_type==-2)) {
  1621.               if ((opt->getmode & 1) && (ptr>0)) {
  1622.                 HT_ADD_ADR;    // refresh
  1623.               }
  1624.               lastsaved=adr;    // dernier Θcrit+1
  1625.             }
  1626.  
  1627.             // sauter espaces
  1628.             // adr+=p;
  1629.             INCREMENT_CURRENT_ADR(p);
  1630.             while( ( is_space(*adr) || (
  1631.               inscriptgen 
  1632.               && adr[0] == '\\' 
  1633.               && is_space(adr[1])
  1634.               )
  1635.               )
  1636.               && quote == '\0'
  1637.               ) {
  1638.                 if (!quote)
  1639.                   if ((*adr=='\"') || (*adr=='\'')) {
  1640.                     quote=*adr;                     // on doit attendre cela α la fin
  1641.                     if (inscriptgen && *(adr - 1) == '\\') {
  1642.                       quoteinscript=1;  /* will wait for \" */
  1643.                     }
  1644.                   }
  1645.                   // puis quitter
  1646.                   // adr++;    // sauter les espaces, "" et cie
  1647.                   INCREMENT_CURRENT_ADR(1);
  1648.               }
  1649.  
  1650.               /* Stop at \n (LF) if primary links or link lists */
  1651.               if (ptr == 0 || (in_media && strcmp(in_media,"LNK")==0))
  1652.                 quote='\n';
  1653.               /* s'arrΩter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
  1654.               else if (inscript && ! unquoted_script)
  1655.                 noquote=1;
  1656.  
  1657.               // sauter Θventuel \" ou \' javascript
  1658.               if (inscript) {    // on est dans un obj.write("..
  1659.                 if (*adr=='\\') {
  1660.                   if ((*(adr+1)=='\'') || (*(adr+1)=='"')) {  // \" ou \'
  1661.                     // adr+=2;    // sauter
  1662.                     INCREMENT_CURRENT_ADR(2);
  1663.                   }
  1664.                 }
  1665.               }
  1666.  
  1667.               // sauter content="1;URL=http://..
  1668.               if (p_searchMETAURL) {
  1669.                 int l=0;
  1670.                 while(
  1671.                   (adr + l + 4 < r->adr + r->size)
  1672.                   && (!strfield(adr+l,"URL=")) 
  1673.                   && (l<128) ) l++;
  1674.                 if (!strfield(adr+l,"URL="))
  1675.                   ok=-1;
  1676.                 else
  1677.                   adr+=(l+4);
  1678.               }
  1679.  
  1680.               /* Θviter les javascript:document.location=.. : les parser, plut⌠t */
  1681.               if (ok!=-1) {
  1682.                 if (strfield(adr,"javascript:") 
  1683.                   && ! inscript       /* we don't want to parse 'javascript:' inside document.write inside scripts */
  1684.                   ) {
  1685.                     ok=-1;
  1686.                     /*
  1687.                     On est dΘsormais dans du code javascript
  1688.                     */
  1689.                     inscript_name="";
  1690.                     inscript_tag=inscript=1;
  1691.                     inscript_state_pos=INSCRIPT_START;
  1692.                     inscript_tag_lastc=quote;     /* α attendre α la fin */
  1693.                     if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  1694.                   }
  1695.               }
  1696.  
  1697.               if (p_type==1) {
  1698.                 if (*adr=='#') {
  1699.                   adr++;           // sauter # pour usemap etc
  1700.                 }
  1701.               }
  1702.               eadr=adr;
  1703.  
  1704.               // ne pas flusher aprΦs code si on doit Θcrire le codebase avant!
  1705.               if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
  1706.                 if ((opt->getmode & 1) && (ptr>0)) {
  1707.                   HT_ADD_ADR;    // refresh
  1708.                 }
  1709.                 lastsaved=adr;    // dernier Θcrit+1
  1710.                 // aprΦs on Θcrira soit les donnΘes initiales,
  1711.                 // soir une URL/lien modifiΘ!
  1712.               } else if (p_type==-1) p_flush=adr;    // flusher jusqu'α adr ensuite
  1713.  
  1714.               if (ok!=-1) {    // continuer
  1715.                 // dΘcouper le lien
  1716.                 do {
  1717.                   if ((* (unsigned char*) eadr)<32) {   // caractΦre de contr⌠le (ou \0)
  1718.                     if (!is_space(*eadr))
  1719.                       ok=0; 
  1720.                   }
  1721.                   if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE)  // ** trop long, >HTS_URLMAXSIZE caractΦres (on prΘvoit HTS_URLMAXSIZE autres pour path)
  1722.                     ok=-1;    // ne pas traiter ce lien
  1723.  
  1724.                   if (ok > 0) {
  1725.                     //if (*eadr!=' ') {  
  1726.                     if (is_space(*eadr)) {   // guillemets,CR, etc
  1727.                       if ( 
  1728.                         ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') )  // end quote
  1729.                         || ( noquote && (*eadr == '\"' || *eadr == '\'') )       // end at any quote
  1730.                         || (!noquote && quote == '\0' && is_realspace(*eadr) )   // unquoted href
  1731.                         )     // si pas d'attente de quote spΘciale ou si quote atteinte
  1732.                         ok=0; 
  1733.                     } else if (ending_p && (*eadr==ending_p))
  1734.                       ok=0;
  1735.                     else {
  1736.                       switch(*eadr) {
  1737.                     case '>': 
  1738.                       if (!quote) {
  1739.                         if (!inscript && !in_media) {
  1740.                           intag=0;    // PLUS dans un tag!
  1741.                           intag_start_valid=0;
  1742.                         }
  1743.                         ok=0;
  1744.                       }
  1745.                       break;
  1746.                       /*case '<':*/ 
  1747.                     case '#': 
  1748.                       if (*(eadr-1) != '&')       // (
  1749.                         ok=0; 
  1750.                       break;
  1751.                       // case '?': non!
  1752.                     case '\\': if (inscript) ok=0; break;     // \" ou \' point d'arrΩt
  1753.                     case '?': quote_adr=adr; break;           // noter position query
  1754.                       }
  1755.                     }
  1756.                     //}
  1757.                   } 
  1758.                   eadr++;
  1759.                 } while(ok==1);
  1760.  
  1761.                 // Empty link detected
  1762.                 if ( (((int) (eadr - adr))) <= 1) {       // link empty
  1763.                   ok=-1;        // No
  1764.                   if (*adr != '#') {        // Not empty+unique #
  1765.                     if ( (((int) (eadr - adr)) == 1)) {       // 1=link empty with delim (end_adr-start_adr)
  1766.                       if (quote) {
  1767.                         if ((opt->getmode & 1) && (ptr>0)) { 
  1768.                           HT_ADD("#");        // We add this for a <href="">
  1769.                         }
  1770.                       }
  1771.                     }
  1772.                   }
  1773.                 }
  1774.  
  1775.                 // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag
  1776.                 if (strfield(adr, "(Empty Reference!)")) {
  1777.                   ok=-1;        // No
  1778.                 }
  1779.  
  1780.               }
  1781.  
  1782.               if (ok==0) {    // tester un lien
  1783.                 char BIGSTK lien[HTS_URLMAXSIZE*2];
  1784.                 int meme_adresse=0;      // 0 par dΘfaut pour primary
  1785.                 //char *copie_de_adr=adr;
  1786.                 //char* p;
  1787.  
  1788.                 // construire lien (dΘcoupage)
  1789.                 if ( (((int) (eadr -  adr))-1) < HTS_URLMAXSIZE  ) {    // pas trop long?
  1790.                   strncpy(lien,adr,((int) (eadr - adr))-1);
  1791.                   *(lien+  (((int) (eadr -  adr)))-1  )='\0';
  1792.                   //printf("link: %s\n",lien);          
  1793.                   // supprimer les espaces
  1794.                   while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
  1795.  
  1796.  
  1797.                 } else
  1798.                   lien[0]='\0';    // erreur
  1799.  
  1800.  
  1801.                 // ------------------------------------------------------
  1802.                 // Lien repΘrΘ et extrait
  1803.                 if (strnotempty(lien)>0) {           // construction du lien
  1804.                   char BIGSTK adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2];          // ATTENTION adr cache le "vrai" adr
  1805.                   int forbidden_url=-1;              // lien non interdit (mais non autorisΘ..)
  1806.                   int just_test_it=0;                // mode de test des liens
  1807.                   int set_prio_to=0;                 // pour capture de page isolΘe
  1808.                   int import_done=0;                 // lien importΘ (ne pas scanner ensuite *α priori*)
  1809.                   //
  1810.                   adr[0]='\0'; fil[0]='\0';
  1811.                   //
  1812.                   // 0: autorisΘ
  1813.                   // 1: interdit (patcher tout de mΩme adresse)
  1814.  
  1815.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1816.                     fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush;
  1817.                   }
  1818.  
  1819.                   // external check
  1820. #if HTS_ANALYSTE
  1821.                   if (!hts_htmlcheck_linkdetected(lien) || !hts_htmlcheck_linkdetected2(lien, intag_start)) {
  1822.                     error=1;    // erreur
  1823.                     if (opt->errlog) {
  1824.                       fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien);
  1825.                       test_flush;
  1826.                     }
  1827.                   }
  1828. #endif
  1829.  
  1830. #if HTS_STRIP_DOUBLE_SLASH
  1831.                   // supprimer les // en / (sauf pour http://)
  1832.                   if (opt->urlhack) {
  1833.                     char *a,*p,*q;
  1834.                     int done=0;
  1835.                     a=strchr(lien,':');    // http://
  1836.                     if (a) {
  1837.                       a++;
  1838.                       while(*a=='/') a++;    // position aprΦs http://
  1839.                     } else {
  1840.                       a=lien;                // dΘbut
  1841.                       while(*a=='/') a++;    // position aprΦs http://
  1842.                     }
  1843.                     q=strchr(a,'?');     // ne pas traiter aprΦs '?'
  1844.                     if (!q)
  1845.                       q=a+strlen(a)-1;
  1846.                     while(( p=strstr(a,"//")) && (!done) ) {    // remplacer // par /
  1847.                       if ((int) p>(int) q) {   // aprΦs le ? (toto.cgi?param=1//2.3)
  1848.                         done=1;    // stopper
  1849.                       } else {
  1850.                         char BIGSTK tempo[HTS_URLMAXSIZE*2];
  1851.                         tempo[0]='\0';
  1852.                         strncatbuff(tempo,a,(int) p - (int) a);
  1853.                         strcatbuff (tempo,p+1);
  1854.                         strcpybuff(a,tempo);    // recopier
  1855.                       }
  1856.                     }
  1857.                   }
  1858. #endif
  1859.  
  1860.                   // purger espaces de dΘbut et fin, CR,LF rΘsiduels
  1861.                   // (IMG SRC="foo.<\n><\t>gif<\t>")
  1862.                   {
  1863.                     char* a = lien;
  1864.                     int llen;
  1865.  
  1866.                     // strip ending spaces
  1867.                     llen = ( *a != '\0' ) ? strlen(a) : 0;
  1868.                     while(llen > 0 && is_realspace(lien[llen - 1]) ) {
  1869.                       a[--llen]='\0';
  1870.                     } 
  1871.                     //  skip leading ones
  1872.                     while(is_realspace(*a)) a++;
  1873.                     // strip cr, lf, tab inside URL
  1874.                     llen = 0;
  1875.                     while(*a) {
  1876.                       if (*a != '\n' && *a != '\r' && *a != '\t') {
  1877.                         lien[llen++] = *a;
  1878.                       }
  1879.                       a++;
  1880.                     }
  1881.                     lien[llen] = '\0';
  1882.                   }
  1883.  
  1884.                   // commas are forbidden
  1885.                   if (archivetag_p) {
  1886.                     if (strchr(lien, ',')) {
  1887.                       error=1;    // erreur
  1888.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  1889.                         fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush;
  1890.                       }
  1891.                     }
  1892.                   }               
  1893.  
  1894.                   /* Unescape/escape %20 and other   */
  1895.                   {
  1896.                     char BIGSTK query[HTS_URLMAXSIZE*2];
  1897.                     char* a=strchr(lien,'?');
  1898.                     if (a) {
  1899.                       strcpybuff(query,a);
  1900.                       *a='\0';
  1901.                     } else
  1902.                       query[0]='\0';
  1903.                     // conversion & -> & et autres joyeusetΘs
  1904.                     unescape_amp(lien);
  1905.                     unescape_amp(query);
  1906.                     // dΘcoder l'inutile (%2E par exemple) et coder espaces
  1907.                     // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien));
  1908.                     strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
  1909.                     escape_remove_control(lien);
  1910.                     escape_spc_url(lien);
  1911.                     strcatbuff(lien,query);     /* restore */
  1912.                   }
  1913.  
  1914.                   // convertir les Θventuels \ en des / pour Θviter des problΦmes de reconnaissance!
  1915.                   {
  1916.                     char* a;
  1917.                     for(a = jump_identification(lien) ; *a != '\0' && *a != '?' ; a++) {
  1918.                       if (*a == '\\') {
  1919.                         *a = '/';
  1920.                       }
  1921.                     }
  1922.                   }
  1923.  
  1924.                   // supprimer le(s) ./
  1925.                   while ((lien[0]=='.') && (lien[1]=='/')) {
  1926.                     char BIGSTK tempo[HTS_URLMAXSIZE*2];
  1927.                     strcpybuff(tempo,lien+2);
  1928.                     strcpybuff(lien,tempo);
  1929.                   }
  1930.                   if (strnotempty(lien)==0)  // sauf si plus de nom de fichier
  1931.                     strcpybuff(lien,"./");
  1932.  
  1933.                   // vΘrifie les /~machin -> /~machin/
  1934.                   // supposition dangereuse?
  1935.                   // OUI!!
  1936. #if HTS_TILDE_SLASH
  1937.                   if (lien[strlen(lien)-1]!='/') {
  1938.                     char *a=lien+strlen(lien)-1;
  1939.                     // Θviter aussi index~1.html
  1940.                     while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
  1941.                     if (*a=='~') {
  1942.                       strcatbuff(lien,"/");    // ajouter slash
  1943.                     }
  1944.                   }
  1945. #endif
  1946.  
  1947.                   // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
  1948.                   // yes, this is dirty
  1949.                   // but I'm so lazzy..
  1950.                   // and besides the java "code" convention is really a pain in html code
  1951.                   if (p_type==-1) {
  1952.                     char* a=strrchr(lien,'.');
  1953.                     add_class_dots_to_patch=0;
  1954.                     if (a) {
  1955.                       char* b;
  1956.                       do {
  1957.                         b=strchr(lien,'.');
  1958.                         if ((b != a) && (b)) {
  1959.                           add_class_dots_to_patch++;
  1960.                           *b='/';
  1961.                         }
  1962.                       } while((b != a) && (b));
  1963.                     }
  1964.                   }
  1965.  
  1966.                   // Θliminer les Θventuels :80 (port par dΘfaut!)
  1967.                   if (link_has_authority(lien)) {
  1968.                     char * a;
  1969.                     a=strstr(lien,"//");    // "//" authority
  1970.                     if (a)
  1971.                       a+=2;
  1972.                     else
  1973.                       a=lien;
  1974.                     // while((*a) && (*a!='/') && (*a!=':')) a++;
  1975.                     a=jump_toport(a);
  1976.                     if (a) {  // port
  1977.                       int port=0;
  1978.                       int defport=80;
  1979.                       char* b=a+1;
  1980. #if HTS_USEOPENSSL
  1981.                       // FIXME
  1982.                       //if (strfield(adr, "https:")) {
  1983.                       //}
  1984. #endif
  1985.                       while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
  1986.                       if (port==defport) {  // port 80, default - c'est dΘbile
  1987.                         char BIGSTK tempo[HTS_URLMAXSIZE*2];
  1988.                         tempo[0]='\0';
  1989.                         strncatbuff(tempo,lien,(int) (a - lien));
  1990.                         strcatbuff(tempo,a+3);  // sauter :80
  1991.                         strcpybuff(lien,tempo);
  1992.                       }
  1993.                     }
  1994.                   }
  1995.  
  1996.                   // filtrer les parazites (mailto & cie)
  1997.                   /*
  1998.                   if (strfield(lien,"mailto:")) {  // ne pas traiter
  1999.                   error=1;
  2000.                   } else if (strfield(lien,"news:")) {  // ne pas traiter
  2001.                   error=1;
  2002.                   }
  2003.                   */
  2004.  
  2005.                   // vΘrifier que l'on ne doit pas ajouter de .class
  2006.                   if (!error) {
  2007.                     if (add_class) {
  2008.                       char *a = lien+strlen(lien)-1;
  2009.                       while(( a > lien) && (*a!='/') && (*a!='.')) a--;
  2010.                       if (*a != '.')
  2011.                         strcatbuff(lien,".class");    // ajouter .class
  2012.                       else if (!strfield2(a,".class"))
  2013.                         strcatbuff(lien,".class");    // idem
  2014.                     }
  2015.                   }
  2016.  
  2017.                   // si c'est un chemin, alors vΘrifier (toto/toto.html -> http://www/toto/)
  2018.                   if (!error) {
  2019.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  2020.                       fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush;
  2021.                     }
  2022.  
  2023.                     if ((p_type==2) || (p_type==-2)) {   // code ou codebase                        
  2024.                       // VΘrifier les codebase=applet (au lieu de applet/)
  2025.                       if (p_type==-2) {    // codebase
  2026.                         if (strnotempty(lien)) {
  2027.                           if (fil[strlen(lien)-1]!='/') {  // pas rΘpertoire
  2028.                             strcatbuff(lien,"/");
  2029.                           }
  2030.                         }
  2031.                       }
  2032.  
  2033.                       /* base has always authority */
  2034.                       if (p_type==2 && !link_has_authority(lien)) {
  2035.                         char BIGSTK tmp[HTS_URLMAXSIZE*2];
  2036.                         strcpybuff(tmp, "http://");
  2037.                         strcatbuff(tmp, lien);
  2038.                         strcpybuff(lien, tmp);
  2039.                       }
  2040.  
  2041.                       /* only one ending / (bug on some pages) */
  2042.                       if ((int)strlen(lien)>2) {
  2043.                         int len = (int) strlen(lien);
  2044.                         while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' )    /* double // (bug) */
  2045.                           lien[--len]='\0';
  2046.                       }
  2047.                       // copier nom host si besoin est
  2048.                       if (!link_has_authority(lien)) {  // pas de http://
  2049.                         char BIGSTK adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2];  // ** euh ident_url_relatif??
  2050.                         if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {                        
  2051.                           error=1;
  2052.                         } else {
  2053.                           strcpybuff(lien,"http://");
  2054.                           strcatbuff(lien,adr2);
  2055.                           if (*fil2!='/')
  2056.                             strcatbuff(lien,"/");
  2057.                           strcatbuff(lien,fil2);
  2058.                           {
  2059.                             char* a;
  2060.                             a=lien+strlen(lien)-1;
  2061.                             while((*a) && (*a!='/') && ( a> lien)) a--;
  2062.                             if (*a=='/') {
  2063.                               *(a+1)='\0';
  2064.                             }
  2065.                           }
  2066.                           //char BIGSTK tempo[HTS_URLMAXSIZE*2];
  2067.                           //strcpybuff(tempo,"http://");
  2068.                           //strcatbuff(tempo,urladr);    // host
  2069.                           //if (*lien!='/')
  2070.                           //  strcatbuff(tempo,"/");
  2071.                           //strcatbuff(tempo,lien);
  2072.                           //strcpybuff(lien,tempo);
  2073.                         }
  2074.                       }
  2075.  
  2076.                       if (!error) {  // pas d'erreur?
  2077.                         if (p_type==2) {   // code ET PAS codebase      
  2078.                           char* a=lien+strlen(lien)-1;
  2079.                           while( (a > lien) && (*a) && (*a!='/')) a--;
  2080.                           if (*a=='/')     // ok on a repΘrΘ le dernier /
  2081.                             *(a+1)='\0';   // couper
  2082.                           else {
  2083.                             *lien='\0';    // Θliminer
  2084.                             error=1;   // erreur, ne pas poursuivre
  2085.                           }      
  2086.                         }
  2087.  
  2088.                         // stocker base ou codebase?
  2089.                         switch(p_type) {
  2090.                       case 2: { 
  2091.                         //if (*lien!='/') strcatbuff(base,"/");
  2092.                         strcpybuff(base,lien);
  2093.                               }
  2094.                               break;      // base
  2095.                       case -2: {
  2096.                         //if (*lien!='/') strcatbuff(codebase,"/");
  2097.                         strcpybuff(codebase,lien); 
  2098.                                }
  2099.                                break;  // base
  2100.                         }
  2101.  
  2102.                         if ((opt->debug>1) && (opt->log!=NULL)) {
  2103.                           fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush;
  2104.                         }
  2105.                         //printf("base code: %s - %s\n",lien,base);
  2106.                       }
  2107.  
  2108.                     } else {
  2109.                       char* _base;
  2110.                       if (p_type==-1)   // code (applet)
  2111.                         _base=codebase;
  2112.                       else
  2113.                         _base=base;
  2114.  
  2115.  
  2116.                       // ajouter chemin de base href..
  2117.                       if (strnotempty(_base)) {       // considΘrer base
  2118.                         if (!link_has_authority(lien)) {    // non absolue
  2119.                           if (*lien!='/') {           // non absolu sur le site (/)
  2120.                             if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
  2121.                               // mailto: and co: do NOT add base
  2122.                               if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
  2123.                                 char BIGSTK tempo[HTS_URLMAXSIZE*2];
  2124.                                 // base est absolue
  2125.                                 strcpybuff(tempo,_base);
  2126.                                 strcatbuff(tempo,lien + ((*lien=='/')?1:0) );
  2127.                                 strcpybuff(lien,tempo);        // patcher en considΘrant base
  2128.                                 // ** vΘrifier que ../ fonctionne (ne doit pas arriver mais bon..)
  2129.  
  2130.                                 if ((opt->debug>1) && (opt->log!=NULL)) {
  2131.                                   fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
  2132.                                 }
  2133.                               }
  2134.                             } else {
  2135.                               error=1;    // erreur
  2136.                               if (opt->errlog) {
  2137.                                 fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
  2138.                                 test_flush;
  2139.                               }
  2140.                             }
  2141.                           } else {
  2142.                             char BIGSTK badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2];
  2143.                             if (ident_url_absolute(_base, badr, bfil) >=0 ) {
  2144.                               if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
  2145.                                 char BIGSTK tempo[HTS_URLMAXSIZE*2];
  2146.                                 // base est absolue
  2147.                                 tempo[0] = '\0';
  2148.                                 if (!link_has_authority(badr)) {
  2149.                                   strcatbuff(tempo, "http://");
  2150.                                 }
  2151.                                 strcatbuff(tempo,badr);
  2152.                                 strcatbuff(tempo,lien);
  2153.                                 strcpybuff(lien,tempo);        // patcher en considΘrant base
  2154.  
  2155.                                 if ((opt->debug>1) && (opt->log!=NULL)) {
  2156.                                   fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
  2157.                                 }
  2158.                               } else {
  2159.                                 error=1;    // erreur
  2160.                                 if (opt->errlog) {
  2161.                                   fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
  2162.                                   test_flush;
  2163.                                 }
  2164.                               }
  2165.                             }
  2166.                           }
  2167.                         }
  2168.                       }
  2169.  
  2170.  
  2171.                     }
  2172.                   }
  2173.  
  2174.  
  2175.                   // transformer lien quelconque (http, relatif, etc) en une adresse
  2176.                   // et un chemin+fichier (adr,fil)
  2177.                   if (!error) {
  2178.                     int reponse;
  2179.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  2180.                       fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
  2181.                     }
  2182.                     if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) {                        
  2183.                       adr[0]='\0';    // erreur
  2184.                       if (reponse==-2) {
  2185.                         if (opt->errlog) {
  2186.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien);
  2187.                           test_flush;
  2188.                         }
  2189.                       } else {
  2190.                         if ((opt->debug>1) && (opt->errlog!=NULL)) {
  2191.                           fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
  2192.                         }
  2193.                       }
  2194.                     } else {
  2195.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  2196.                         fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush;
  2197.                       }
  2198.                     }
  2199.                   } else {
  2200.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  2201.                       fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush;
  2202.                     }
  2203.                     adr[0]='\0';
  2204.                   }
  2205.  
  2206. #if HTS_CHECK_STRANGEDIR
  2207.                   // !ATTENTION!
  2208.                   // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash α la fin)
  2209.                   // je n'ai pas encore trouvΘ le moyen de faire la diffΘrence entre un rΘpertoire
  2210.                   // et un fichier en http A PRIORI : je fais donc un test
  2211.                   // En cas de moved xxx, on recalcule adr et fil, tout simplement
  2212.                   // DEFAUT: test effectuΘ plusieurs fois! α revoir!!!
  2213.                   if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
  2214.                     //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
  2215.                     if (fil[strlen(fil)-1]!='/') {  // pas rΘpertoire
  2216.                       if (ishtml(fil)==-2) {    // pas d'extension
  2217.                         char BIGSTK loc[HTS_URLMAXSIZE*2];  // Θventuelle nouvelle position
  2218.                         loc[0]='\0';
  2219.                         if ((opt->debug>1) && (opt->log!=NULL)) {
  2220.                           fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil);
  2221.                           test_flush;
  2222.                         }
  2223.  
  2224.                         // tester Θventuelle nouvelle position
  2225.                         switch (http_location(adr,fil,loc).statuscode) {
  2226.                       case 200: // ok au final
  2227.                         if (strnotempty(loc)) {  // a changΘ d'adresse
  2228.                           if (opt->errlog) {
  2229.                             fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
  2230.                             test_flush;
  2231.                           }
  2232.  
  2233.                           // recalculer adr et fil!
  2234.                           if (ident_url_absolute(loc,adr,fil)==-1) {
  2235.                             adr[0]='\0';  // cancel
  2236.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2237.                               fspc(opt->log,"debug"); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil);
  2238.                               test_flush;
  2239.                             }
  2240.                           }
  2241.  
  2242.                         }
  2243.                         break;
  2244.                       case -2: case -3:  // timeout ou erreur grave
  2245.                         if (opt->errlog) {
  2246.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
  2247.                           test_flush;
  2248.                         }
  2249.  
  2250.                         break;
  2251.                         }
  2252.  
  2253.                       }
  2254.                     } 
  2255.                   }
  2256. #endif
  2257.  
  2258.                   // Le lien doit juste Ωtre rΘΘcrit, mais ne doit pas gΘnΘrer un lien
  2259.                   // exemple: <FORM ACTION="url_cgi">
  2260.                   if (p_nocatch) {
  2261.                     forbidden_url=1;    // interdire rΘcupΘration du lien
  2262.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  2263.                       fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil);
  2264.                       test_flush;
  2265.                     }
  2266.                   }
  2267.  
  2268.                   // Tester si un lien doit Ωtre acceptΘ ou refusΘ (wizard)
  2269.                   // forbidden_url=1 : lien refusΘ
  2270.                   // forbidden_url=0 : lien acceptΘ
  2271.                   //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  2272.                   if ((p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  2273.                     if (!p_nocatch) {
  2274.                       if (adr[0]!='\0') {          
  2275.                         if ((opt->debug>1) && (opt->log!=NULL)) {
  2276.                           fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil);
  2277.                           test_flush;
  2278.                         }
  2279.                         forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
  2280.                           adr,fil,
  2281.                           NULL, NULL,
  2282.                           &set_prio_to,
  2283.                           &just_test_it);
  2284.                         if ((opt->debug>1) && (opt->log!=NULL)) {
  2285.                           fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url);
  2286.                           test_flush;
  2287.                         }
  2288.                       }
  2289.                     }
  2290.                   }
  2291.  
  2292.                   // calculer meme_adresse
  2293.                   meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
  2294.  
  2295.  
  2296.  
  2297.                   // DΘbut partie sauvegarde
  2298.  
  2299.                   // ici on forme le nom du fichier α sauver, et on patche l'URL
  2300.                   if (adr[0]!='\0') {
  2301.                     // savename: simplifier les ../ et autres joyeusetΘs
  2302.                     char BIGSTK save[HTS_URLMAXSIZE*2];
  2303.                     int r_sv=0;
  2304.                     // En cas de moved, adresse premiΦre
  2305.                     char BIGSTK former_adr[HTS_URLMAXSIZE*2];
  2306.                     char BIGSTK former_fil[HTS_URLMAXSIZE*2];
  2307.                     //
  2308.                     save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
  2309.                     //
  2310.  
  2311.                     // nom du chemin α sauver si on doit le calculer
  2312.                     // note: url_savename peut dΘcider de tester le lien si il le trouve
  2313.                     // suspect, et modifier alors adr et fil
  2314.                     // dans ce cas on aura une rΘfΘrence directe au lieu des traditionnels
  2315.                     // moved en cascade (impossible α reproduire α priori en local, lorsque des fichiers
  2316.                     // gif sont impliquΘs par exemple)
  2317.                     if ((p_type!=2) && (p_type!=-2)) {  // pas base href ou codebase
  2318.                       if (forbidden_url!=1) {
  2319.                         char BIGSTK last_adr[HTS_URLMAXSIZE*2];
  2320.                         last_adr[0]='\0';
  2321.                         //char last_fil[HTS_URLMAXSIZE*2]="";
  2322.                         strcpybuff(last_adr,adr);    // ancienne adresse
  2323.                         //strcpybuff(last_fil,fil);    // ancien chemin
  2324.                         r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
  2325.                         if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) {  // a changΘ
  2326.  
  2327.                           // 2e test si moved
  2328.  
  2329.                           // Tester si un lien doit Ωtre acceptΘ ou refusΘ (wizard)
  2330.                           // forbidden_url=1 : lien refusΘ
  2331.                           // forbidden_url=0 : lien acceptΘ
  2332.                           if ((ptr>0) && (p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  2333.                             if (!p_nocatch) {
  2334.                               if (adr[0]!='\0') {          
  2335.                                 if ((opt->debug>1) && (opt->log!=NULL)) {
  2336.                                   fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil);
  2337.                                   test_flush;
  2338.                                 }
  2339.                                 forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
  2340.                                   adr,fil,
  2341.                                   NULL, NULL,
  2342.                                   &set_prio_to,
  2343.                                   &just_test_it);
  2344.                                 if ((opt->debug>1) && (opt->log!=NULL)) {
  2345.                                   fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url);
  2346.                                   test_flush;
  2347.                                 }
  2348.                               }
  2349.                             }
  2350.                           }
  2351.  
  2352.                           //import_done=1;    // c'est un import!
  2353.                           meme_adresse=0;   // on a changΘ
  2354.                         }
  2355.                       } else {
  2356.                         strcpybuff(save,"");  // dummy
  2357.                       }
  2358.                     }
  2359.                     if (r_sv!=-1) {  // pas d'erreur, on continue
  2360.                       /* log */
  2361.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  2362.                         fspc(opt->log,"debug");
  2363.                         if (forbidden_url!=1) {    // le lien va Ωtre chargΘ
  2364.                           if ((p_type==2) || (p_type==-2)) {  // base href ou codebase, pas un lien
  2365.                             fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil);
  2366.                           } else if ((opt->getmode & 4)==0) {
  2367.                             fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
  2368.                           } else {
  2369.                             if (!ishtml(fil))
  2370.                               fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save);
  2371.                             else
  2372.                               fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
  2373.                           } 
  2374.                         } else
  2375.                           fprintf(opt->log,"External: %s%s"LF,adr,fil);
  2376.                         test_flush;
  2377.                       }
  2378.                       /* FIN log */
  2379.  
  2380.                       // Θcrire lien
  2381.                       if ((p_type==2) || (p_type==-2)) {  // base href ou codebase, sauter
  2382.                         lastsaved=eadr-1+1;  // sauter "
  2383.                       }
  2384.                       /* */
  2385.                       else if (opt->urlmode==0) {    // URL absolue dans tous les cas
  2386.                         if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2387.                           if (!link_has_authority(adr)) {
  2388.                             HT_ADD("http://");
  2389.                           } else {
  2390.                             char* aut = strstr(adr, "//");
  2391.                             if (aut) {
  2392.                               char tmp[256];
  2393.                               tmp[0]='\0';
  2394.                               strncatbuff(tmp, adr, (int) (aut - adr));   // scheme
  2395.                               HT_ADD(tmp);          // Protocol
  2396.                               HT_ADD("//");
  2397.                             }
  2398.                           }
  2399.  
  2400.                           if (!opt->passprivacy) {
  2401.                             HT_ADD_HTMLESCAPED(jump_protocol(adr));           // Password
  2402.                           } else {
  2403.                             HT_ADD_HTMLESCAPED(jump_identification(adr));     // No Password
  2404.                           }
  2405.                           if (*fil!='/')
  2406.                             HT_ADD("/");
  2407.                           HT_ADD_HTMLESCAPED(fil);
  2408.                         }
  2409.                         lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2410.                         /* */
  2411.                       } else if (opt->urlmode >= 4) {    // ne rien faire dans tous les cas!
  2412.                         /* */
  2413.                         /* leave the link 'as is' */
  2414.                         /* Sinon, dΘpend de interne/externe */
  2415.                       } else if (forbidden_url==1) {    // le lien ne sera pas chargΘ, rΘfΘrence externe!
  2416.                         if ((opt->getmode & 1) && (ptr>0)) {
  2417.                           if (p_type!=-1) {     // pas que le nom de fichier (pas classe java)
  2418.                             if (!opt->external) {
  2419.                               if (!link_has_authority(adr)) {
  2420.                                 HT_ADD("http://");
  2421.                                 if (!opt->passprivacy) {
  2422.                                   HT_ADD_HTMLESCAPED(adr);     // Password
  2423.                                 } else {
  2424.                                   HT_ADD_HTMLESCAPED(jump_identification(adr));     // No Password
  2425.                                 }
  2426.                                 if (*fil!='/')
  2427.                                   HT_ADD("/");
  2428.                                 HT_ADD_HTMLESCAPED(fil);
  2429.                               } else {
  2430.                                 char* aut = strstr(adr, "//");
  2431.                                 if (aut) {
  2432.                                   char tmp[256];
  2433.                                   tmp[0]='\0';
  2434.                                   strncatbuff(tmp, adr, (int) (aut - adr));   // scheme
  2435.                                   HT_ADD(tmp);          // Protocol
  2436.                                   HT_ADD("//");
  2437.                                   if (!opt->passprivacy) {
  2438.                                     HT_ADD_HTMLESCAPED(jump_protocol(adr));          // Password
  2439.                                   } else {
  2440.                                     HT_ADD_HTMLESCAPED(jump_identification(adr));     // No Password
  2441.                                   }
  2442.                                   if (*fil!='/')
  2443.                                     HT_ADD("/");
  2444.                                   HT_ADD_HTMLESCAPED(fil);
  2445.                                 }
  2446.                               }
  2447.                               //
  2448.                             } else {    // fichier/page externe, mais on veut gΘnΘrer une erreur
  2449.                               //
  2450.                               int patch_it=0;
  2451.                               int add_url=0;
  2452.                               char* cat_name=NULL;
  2453.                               char* cat_data=NULL;
  2454.                               int cat_nb=0;
  2455.                               int cat_data_len=0;
  2456.  
  2457.                               // ajouter lien external
  2458.                               switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil))  ) ) {
  2459.                             case 1: case -2:       // html ou rΘpertoire
  2460.                               if (opt->getmode & 1) {  // sauver html
  2461.                                 patch_it=1;   // redirect
  2462.                                 add_url=1;    // avec link?
  2463.                                 cat_name="external.html";
  2464.                                 cat_nb=0;
  2465.                                 cat_data=HTS_DATA_UNKNOWN_HTML;
  2466.                                 cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
  2467.                               }
  2468.                               break;
  2469.                             default:    // inconnu
  2470.                               // asp, cgi..
  2471.                               if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif")) 
  2472.                                 || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg")) 
  2473.                                 || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) 
  2474.                                 /*|| (ishtml(fil)!=0)*/ ) {
  2475.                                 patch_it=1;   // redirect
  2476.                               add_url=1;    // avec link aussi
  2477.                               cat_name="external.gif";
  2478.                               cat_nb=1;
  2479.                               cat_data=HTS_DATA_UNKNOWN_GIF;
  2480.                               cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
  2481.                                 } else /* if (is_dyntype(get_ext(fil))) */ {
  2482.                                   patch_it=1;   // redirect
  2483.                                   add_url=1;    // avec link?
  2484.                                   cat_name="external.html";
  2485.                                   cat_nb=0;
  2486.                                   cat_data=HTS_DATA_UNKNOWN_HTML;
  2487.                                   cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
  2488.                                 }
  2489.                                 break;
  2490.                               }// html,gif
  2491.  
  2492.                               if (patch_it) {
  2493.                                 char BIGSTK save[HTS_URLMAXSIZE*2];
  2494.                                 char BIGSTK tempo[HTS_URLMAXSIZE*2];
  2495.                                 strcpybuff(save,opt->path_html);
  2496.                                 strcatbuff(save,cat_name);
  2497.                                 if (lienrelatif(tempo,save, relativesavename)==0) {
  2498.                                   if (!no_esc_utf)
  2499.                                     escape_uri(tempo);     // escape with %xx
  2500.                                   else
  2501.                                     escape_uri_utf(tempo);     // escape with %xx
  2502.                                   HT_ADD_HTMLESCAPED(tempo);    // page externe
  2503.                                   if (add_url) {
  2504.                                     HT_ADD("?link=");    // page externe
  2505.  
  2506.                                     // same as above
  2507.                                     if (!link_has_authority(adr)) {
  2508.                                       HT_ADD("http://");
  2509.                                       if (!opt->passprivacy) {
  2510.                                         HT_ADD_HTMLESCAPED(adr);     // Password
  2511.                                       } else {
  2512.                                         HT_ADD_HTMLESCAPED(jump_identification(adr));     // No Password
  2513.                                       }
  2514.                                       if (*fil!='/')
  2515.                                         HT_ADD("/");
  2516.                                       HT_ADD_HTMLESCAPED(fil);
  2517.                                     } else {
  2518.                                       char* aut = strstr(adr, "//");
  2519.                                       if (aut) {
  2520.                                         char tmp[256];
  2521.                                         tmp[0]='\0';
  2522.                                         strncatbuff(tmp, adr, (int) (aut - adr) + 2);   // scheme
  2523.                                         HT_ADD(tmp);
  2524.                                         if (!opt->passprivacy) {
  2525.                                           HT_ADD_HTMLESCAPED(jump_protocol(adr));          // Password
  2526.                                         } else {
  2527.                                           HT_ADD_HTMLESCAPED(jump_identification(adr));     // No Password
  2528.                                         }
  2529.                                         if (*fil!='/')
  2530.                                           HT_ADD("/");
  2531.                                         HT_ADD_HTMLESCAPED(fil);
  2532.                                       }
  2533.                                     }
  2534.                                     //
  2535.  
  2536.                                   }
  2537.                                 }
  2538.  
  2539.                                 // Θcrire fichier?
  2540.                                 if (verif_external(cat_nb,1)) {
  2541.                                   //if (!fexist(fconcat(opt->path_html,cat_name))) {
  2542.                                   FILE* fp = filecreate(fconcat(opt->path_html,cat_name));
  2543.                                   if (fp) {
  2544.                                     if (cat_data_len==0) {   // texte
  2545.                                       verif_backblue(opt,opt->path_html);
  2546.                                       fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
  2547.                                     } else {                    // data
  2548.                                       fwrite(cat_data,cat_data_len,1,fp);
  2549.                                     }
  2550.                                     fclose(fp);
  2551.                                     usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"","");
  2552.                                   }
  2553.                                 }
  2554.                               }  else {    // Θcrire normalement le nom de fichier
  2555.                                 HT_ADD("http://");
  2556.                                 if (!opt->passprivacy) {
  2557.                                   HT_ADD_HTMLESCAPED(adr);       // Password
  2558.                                 } else {
  2559.                                   HT_ADD_HTMLESCAPED(jump_identification(adr));       // No Password
  2560.                                 }
  2561.                                 if (*fil!='/')
  2562.                                   HT_ADD("/");
  2563.                                 HT_ADD_HTMLESCAPED(fil);
  2564.                               }// patcher?
  2565.                             }  // external
  2566.                           } else {  // que le nom de fichier (classe java)
  2567.                             // en gros recopie de plus bas: copier codebase et base
  2568.                             if (p_flush) {
  2569.                               char BIGSTK tempo[HTS_URLMAXSIZE*2];    // <-- ajoutΘ
  2570.                               char BIGSTK tempo_pat[HTS_URLMAXSIZE*2];
  2571.  
  2572.                               // Calculer chemin
  2573.                               tempo_pat[0]='\0';
  2574.                               strcpybuff(tempo,fil);  // <-- ajoutΘ
  2575.                               {
  2576.                                 char* a=strrchr(tempo,'/');
  2577.  
  2578.                                 // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
  2579.                                 // we have to do the contrary now
  2580.                                 if (add_class_dots_to_patch>0) {
  2581.                                   while( (add_class_dots_to_patch>0) && (a) ) {
  2582.                                     *a='.';     // convert "false" java / into .
  2583.                                     add_class_dots_to_patch--;
  2584.                                     a=strrchr(tempo,'/');
  2585.                                   }
  2586.                                   // if add_class_dots_to_patch, this is because there is a problem!!
  2587.                                   if (add_class_dots_to_patch) {
  2588.                                     if (opt->errlog) {
  2589.                                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
  2590.                                       test_flush;
  2591.                                     }
  2592.                                   }
  2593.                                 }
  2594.  
  2595.                                 // Cut path/filename
  2596.                                 if (a) {
  2597.                                   char BIGSTK tempo2[HTS_URLMAXSIZE*2];
  2598.                                   strcpybuff(tempo2,a+1);         // FICHIER
  2599.                                   strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1);  // chemin
  2600.                                   strcpybuff(tempo,tempo2);                     // fichier
  2601.                                 }
  2602.                               }
  2603.  
  2604.                               // Θrire codebase="chemin"
  2605.                               if ((opt->getmode & 1) && (ptr>0)) {
  2606.                                 char BIGSTK tempo4[HTS_URLMAXSIZE*2];
  2607.                                 tempo4[0]='\0';
  2608.  
  2609.                                 if (strnotempty(tempo_pat)) {
  2610.                                   HT_ADD("codebase=\"http://");
  2611.                                   if (!opt->passprivacy) {
  2612.                                     HT_ADD_HTMLESCAPED(adr);  // Password
  2613.                                   } else {
  2614.                                     HT_ADD_HTMLESCAPED(jump_identification(adr));  // No Password
  2615.                                   }
  2616.                                   if (*tempo_pat!='/') HT_ADD("/");
  2617.                                   HT_ADD(tempo_pat);
  2618.                                   HT_ADD("\" ");
  2619.                                 }
  2620.  
  2621.                                 strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
  2622.                                 HT_ADD(tempo4);    // refresh code="
  2623.                                 HT_ADD(tempo);
  2624.                               }
  2625.                             }
  2626.                           }
  2627.                         }
  2628.                         lastsaved=eadr-1;
  2629.                       }
  2630.                       /*
  2631.                       else if (opt->urlmode==1) {    // ABSOLU, c'est le cas le moins courant
  2632.                       //  NE FONCTIONNE PAS!!  (et est inutile)
  2633.                       if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2634.                       // Θcrire le lien modifiΘ, absolu
  2635.                       HT_ADD("file:");
  2636.                       if (*save=='/')
  2637.                       HT_ADD(save+1)
  2638.                       else
  2639.                       HT_ADD(save)
  2640.                       }
  2641.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2642.                       }
  2643.                       */
  2644.                       else if (opt->mimehtml) {
  2645.                         char BIGSTK buff[HTS_URLMAXSIZE*3];
  2646.                         HT_ADD("cid:");
  2647.                         strcpybuff(buff, adr);
  2648.                         strcatbuff(buff, fil);
  2649.                         escape_in_url(buff);
  2650.                         { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
  2651.                         HT_ADD_HTMLESCAPED(buff);
  2652.                         lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2653.                       }
  2654.                       else if (opt->urlmode==3) {    // URI absolue /
  2655.                         if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2656.                           HT_ADD_HTMLESCAPED(fil);
  2657.                         }
  2658.                         lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2659.                       }
  2660.                       else if (opt->urlmode==2) {  // RELATIF
  2661.                         char BIGSTK tempo[HTS_URLMAXSIZE*2];
  2662.                         tempo[0]='\0';
  2663.                         // calculer le lien relatif
  2664.  
  2665.                         if (lienrelatif(tempo,save,relativesavename)==0) {
  2666.                           if (!in_media) {    // In media (such as real audio): don't patch
  2667.                             if (!no_esc_utf)
  2668.                               escape_uri(tempo);     // escape with %xx
  2669.                             else {
  2670.                               /* No escaping at all - remaining upper chars will be escaped below */
  2671.                               /* FIXME - Should be done in all local cases */
  2672.                               //x_escape_html(tempo);
  2673.                               //escape_uri_utf(tempo);     // FIXME - escape with %xx
  2674.                               //escape_uri(tempo);     // escape with %xx
  2675.                             }
  2676.                           }
  2677.                           if ((opt->debug>1) && (opt->log!=NULL)) {
  2678.                             fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo);
  2679.                             test_flush;
  2680.                           }
  2681.  
  2682.                           // lien applet (code) - il faut placer un codebase avant
  2683.                           if (p_type==-1) {  // que le nom de fichier
  2684.  
  2685.                             if (p_flush) {
  2686.                               char BIGSTK tempo_pat[HTS_URLMAXSIZE*2];
  2687.                               tempo_pat[0]='\0';
  2688.                               {
  2689.                                 char* a=strrchr(tempo,'/');
  2690.  
  2691.                                 // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
  2692.                                 // we have to do the contrary now
  2693.                                 if (add_class_dots_to_patch>0) {
  2694.                                   while( (add_class_dots_to_patch>0) && (a) ) {
  2695.                                     *a='.';     // convert "false" java / into .
  2696.                                     add_class_dots_to_patch--;
  2697.                                     a=strrchr(tempo,'/');
  2698.                                   }
  2699.                                   // if add_class_dots_to_patch, this is because there is a problem!!
  2700.                                   if (add_class_dots_to_patch) {
  2701.                                     if (opt->errlog) {
  2702.                                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
  2703.                                       test_flush;
  2704.                                     }
  2705.                                   }
  2706.                                 }
  2707.  
  2708.                                 if (a) {
  2709.                                   char BIGSTK tempo2[HTS_URLMAXSIZE*2];
  2710.                                   strcpybuff(tempo2,a+1);
  2711.                                   strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1);  // chemin
  2712.                                   strcpybuff(tempo,tempo2);                     // fichier
  2713.                                 }
  2714.                               }
  2715.  
  2716.                               // Θrire codebase="chemin"
  2717.                               if ((opt->getmode & 1) && (ptr>0)) {
  2718.                                 char BIGSTK tempo4[HTS_URLMAXSIZE*2];
  2719.                                 tempo4[0]='\0';
  2720.  
  2721.                                 if (strnotempty(tempo_pat)) {
  2722.                                   HT_ADD("codebase=\"");
  2723.                                   HT_ADD_HTMLESCAPED(tempo_pat);
  2724.                                   HT_ADD("\" ");
  2725.                                 }
  2726.  
  2727.                                 strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
  2728.                                 HT_ADD(tempo4);    // refresh code="
  2729.                               }
  2730.                             }
  2731.                             //lastsaved=adr;    // dernier Θcrit+1
  2732.                           }                              
  2733.  
  2734.                           if ((opt->getmode & 1) && (ptr>0)) {
  2735.                             // Θcrire le lien modifiΘ, relatif
  2736.                             // Note: escape all chars, even >127 (no UTF)
  2737.                             HT_ADD_HTMLESCAPED_FULL(tempo);
  2738.  
  2739.                             // Add query-string, for informational purpose only
  2740.                             // Useless, because all parameters-pages are saved into different targets
  2741.                             if (opt->includequery) {
  2742.                               char* a=strchr(lien,'?');
  2743.                               if (a) {
  2744.                                 HT_ADD_HTMLESCAPED(a);
  2745.                               }
  2746.                             }
  2747.                           }
  2748.                           lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2749.                         } else {
  2750.                           if (opt->errlog) {
  2751.                             fprintf(opt->errlog,"Error building relative link %s and %s"LF,save,relativesavename);
  2752.                             test_flush;
  2753.                           }
  2754.                         }
  2755.                       }  // sinon le lien sera Θcrit normalement
  2756.  
  2757.  
  2758. #if 0
  2759.                       if (fexist(save)) {    // le fichier existe..
  2760.                         adr[0]='\0';
  2761.                         //if ((opt->debug>0) && (opt->log!=NULL)) {
  2762.                         if (opt->errlog) {
  2763.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link has already been written on disk, cancelled: %s"LF,save);
  2764.                           test_flush;
  2765.                         }
  2766.                       }
  2767. #endif                            
  2768.  
  2769.                       /* Security check */
  2770.                       if (strlen(save) >= HTS_URLMAXSIZE) {
  2771.                         adr[0]='\0';
  2772.                         if (opt->errlog) {
  2773.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link is too long: %s"LF,save);
  2774.                           test_flush;
  2775.                         }
  2776.                       }
  2777.  
  2778.                       if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) {  // si le fichier n'existe pas, ajouter α la liste                            
  2779.                         // n'y a-t-il pas trop de liens?
  2780.                         if (lien_tot+1 >= lien_max-4) {    // trop de liens!
  2781.                           printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__);
  2782.                           if (opt->errlog) {
  2783.                             fprintf(opt->errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
  2784.                             fprintf(opt->errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
  2785.                             test_flush;
  2786.                           }
  2787.                           if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2788.                           XH_uninit;   // dΘsallocation mΘmoire & buffers
  2789.                           return -1;
  2790.  
  2791.                         } else {    // noter le lien sur la listes des liens α charger
  2792.                           int pass_fix,dejafait=0;
  2793.  
  2794.                           // Calculer la prioritΘ de ce lien
  2795.                           if ((opt->getmode & 4)==0) {    // traiter html aprΦs
  2796.                             pass_fix=0;
  2797.                           } else {    // vΘrifier que ce n'est pas un !html
  2798.                             if (!ishtml(fil))
  2799.                               pass_fix=1;        // prioritΘ infΘrieure (traiter aprΦs)
  2800.                             else
  2801.                               pass_fix=max(0,numero_passe);    // prioritΘ normale
  2802.                           }
  2803.  
  2804.                           /* If the file seems to be an html file, get depth-1 */
  2805.                           /*
  2806.                           if (strnotempty(save)) {
  2807.                           if (ishtml(save) == 1) {
  2808.                           // descore_prio = 2;
  2809.                           } else {
  2810.                           // descore_prio = 1;
  2811.                           }
  2812.                           }
  2813.                           */
  2814.  
  2815.                           // vΘrifier que le lien n'a pas dΘja ΘtΘ notΘ
  2816.                           // si c'est le cas, alors il faut s'assurer que la prioritΘ associΘe
  2817.                           // au fichier est la plus grande des deux prioritΘs
  2818.                           //
  2819.                           // On part de la fin et on essaye de se presser (Θconomise temps machine)
  2820. #if HTS_HASH
  2821.                           {
  2822.                             int i=hash_read(hash,save,"",0,opt->urlhack);      // lecture type 0 (sav)
  2823.                             if (i>=0) {
  2824.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  2825.                                 if (
  2826.                                   strcmp(adr, liens[i]->adr) != 0 
  2827.                                   || strcmp(fil, liens[i]->fil) != 0
  2828.                                   ) {
  2829.                                     fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil);
  2830.                                     test_flush;
  2831.                                   }
  2832.                               }
  2833.                               liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
  2834.                               dejafait=1;
  2835.                             }
  2836.                           }
  2837. #else
  2838.                           {
  2839.                             int l;
  2840.                             int i;
  2841.                             l=strlen(save);  // opti
  2842.                             for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
  2843.                               if (liens[i]->sav_len==l) {    // mΩme taille de chaεne
  2844.                                 if (strcmp(liens[i]->sav,save)==0) {    // existe dΘja
  2845.                                   liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
  2846.                                   dejafait=1;
  2847.                                 }
  2848.                               }
  2849.                             }
  2850.                           }
  2851. #endif
  2852.  
  2853.                           // le lien n'a jamais ΘtΘ crΘΘ.
  2854.                           // cette fois ci, on le crΘe!
  2855.                           if (!dejafait) {                                
  2856.                             //
  2857.                             // >>>> CREER LE LIEN <<<<
  2858.                             //
  2859.                             // enregistrer lien α charger
  2860.                             //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
  2861.                             // mΩme adresse: l'objet pΦre est l'objet pΦre de l'actuel
  2862.  
  2863.                             // DEBUT ROBOTS.TXT AJOUT
  2864.                             if (!just_test_it) {
  2865.                               if (
  2866.                                 (!strfield(adr,"ftp://"))         // non ftp
  2867.                                 && (!strfield(adr,"file://")) ) {    // non file
  2868.                                   if (opt->robots) {    // rΘcupΘrer robots
  2869.                                     if (ishtml(fil)!=0) {                       // pas la peine pour des fichiers isolΘs
  2870.                                       if (checkrobots(_ROBOTS,adr,"") != -1) {    // robots.txt ?
  2871.                                         checkrobots_set(_ROBOTS ,adr,"");          // ajouter entrΘe vide
  2872.                                         if (checkrobots(_ROBOTS,adr,"") == -1) {    // robots.txt ?
  2873.                                           // enregistrer robots.txt (MACRO)
  2874.                                           liens_record(adr,"/robots.txt","","","");
  2875.                                           if (liens[lien_tot]==NULL) {  // erreur, pas de place rΘservΘe
  2876.                                             printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2877.                                             if (opt->errlog) { 
  2878.                                               fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2879.                                               test_flush;
  2880.                                             }
  2881.                                             if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2882.                                             XH_uninit;    // dΘsallocation mΘmoire & buffers
  2883.                                             return -1;
  2884.                                           }  
  2885.                                           liens[lien_tot]->testmode=0;          // pas mode test
  2886.                                           liens[lien_tot]->link_import=0;       // pas mode import     
  2887.                                           liens[lien_tot]->premier=lien_tot;
  2888.                                           liens[lien_tot]->precedent=ptr;
  2889.                                           liens[lien_tot]->depth=0;
  2890.                                           liens[lien_tot]->pass2=max(0,numero_passe);
  2891.                                           liens[lien_tot]->retry=0;
  2892.                                           lien_tot++;  // UN LIEN DE PLUS
  2893. #if DEBUG_ROBOTS
  2894.                                           printf("robots.txt: added file robots.txt for %s\n",adr);
  2895. #endif
  2896.                                           if ((opt->debug>1) && (opt->log!=NULL)) {
  2897.                                             fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr);
  2898.                                             test_flush;
  2899.                                           }
  2900.                                         } else {
  2901.                                           if (opt->errlog) {   
  2902.                                             fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
  2903.                                             test_flush;
  2904.                                           }
  2905.                                         }
  2906.                                       }
  2907.                                     }
  2908.                                   }
  2909.                                 }
  2910.                             }
  2911.                             // FIN ROBOTS.TXT AJOUT
  2912.  
  2913.                             // enregistrer (MACRO)
  2914.                             liens_record(adr,fil,save,former_adr,former_fil);
  2915.                             if (liens[lien_tot]==NULL) {  // erreur, pas de place rΘservΘe
  2916.                               printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2917.                               if (opt->errlog) { 
  2918.                                 fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2919.                                 test_flush;
  2920.                               }
  2921.                               if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2922.                               XH_uninit;    // dΘsallocation mΘmoire & buffers
  2923.                               return -1;
  2924.                             }  
  2925.  
  2926.                             // mode test?
  2927.                             if (!just_test_it)
  2928.                               liens[lien_tot]->testmode=0;          // pas mode test
  2929.                             else
  2930.                               liens[lien_tot]->testmode=1;          // mode test
  2931.                             if (!import_done)
  2932.                               liens[lien_tot]->link_import=0;       // pas mode import
  2933.                             else
  2934.                               liens[lien_tot]->link_import=1;       // mode import
  2935.                             // Θcrire autres paramΦtres de la structure-lien
  2936.                             if ((meme_adresse) && (!import_done) && (liens[ptr]->premier != 0))
  2937.                               liens[lien_tot]->premier=liens[ptr]->premier;
  2938.                             else    // sinon l'objet pΦre est le prΘcΘdent lui mΩme
  2939.                               liens[lien_tot]->premier=lien_tot;
  2940.                             // liens[lien_tot]->premier=ptr;
  2941.  
  2942.                             liens[lien_tot]->precedent=ptr;
  2943.                             // noter la prioritΘ
  2944.                             if (!set_prio_to)
  2945.                               liens[lien_tot]->depth=liens[ptr]->depth - 1;
  2946.                             else
  2947.                               liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1));         // PRIORITE NULLE (catch page)
  2948.                             // noter pass
  2949.                             liens[lien_tot]->pass2=pass_fix;
  2950.                             liens[lien_tot]->retry=opt->retry;
  2951.  
  2952.                             //strcpybuff(liens[lien_tot]->adr,adr);
  2953.                             //strcpybuff(liens[lien_tot]->fil,fil);
  2954.                             //strcpybuff(liens[lien_tot]->sav,save); 
  2955.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2956.                               if (!just_test_it) {
  2957.                                 fspc(opt->log,"debug"); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
  2958.                               } else {
  2959.                                 fspc(opt->log,"debug"); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
  2960.                               }
  2961.                               test_flush;
  2962.                             }
  2963.  
  2964.                             lien_tot++;  // UN LIEN DE PLUS
  2965.                           } else { // if !dejafait
  2966.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2967.                               fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save);
  2968.                               test_flush;
  2969.                             }
  2970.  
  2971.                           }
  2972.  
  2973.  
  2974.                         }   // si pas trop de liens
  2975.                       }   // si adr[0]!='\0'
  2976.  
  2977.  
  2978.                     }  // if adr[0]!='\0' 
  2979.  
  2980.                   }  // if adr[0]!='\0'
  2981.  
  2982.                 }    // if strlen(lien)>0
  2983.  
  2984.               }   // if ok==0      
  2985.  
  2986.               assertf(eadr - adr >= 0);       // Should not go back
  2987.               if (eadr > adr) {
  2988.                 INCREMENT_CURRENT_ADR(eadr - 1 - adr);
  2989.               }
  2990.               // adr=eadr-1;  // ** sauter
  2991.  
  2992.               /* We skipped bytes and skip the " : reset state */
  2993.               /*if (inscript) {
  2994.               inscript_state_pos = INSCRIPT_START;
  2995.               }*/
  2996.  
  2997.           }  // if (p) 
  2998.  
  2999.         }  // si '<' ou '>'
  3000.  
  3001.         // plus loin
  3002.         adr++;      // automate will be checked next loop
  3003.  
  3004.  
  3005.         /* Otimization: if we are scanning in HTML data (not in tag or script), 
  3006.         then jump to the next starting tag */
  3007.         if (ptr>0) {
  3008.           if ( (!intag)         /* Not in tag */
  3009.             && (!inscript)      /* Not in (java)script */
  3010.             && (!in_media)      /* Not in media */
  3011.             && (!incomment)     /* Not in comment (<!--) */
  3012.             && (!inscript_tag)  /* Not in tag with script inside */
  3013.             ) 
  3014.           {
  3015.             /* Not at the end */
  3016.             if (( ((int) (adr - r->adr)) ) < r->size) {
  3017.               /* Not on a starting tag yet */
  3018.               if (*adr != '<') {
  3019.                 /* strchr does not well behave with null chrs.. */
  3020.                 /* char* adr_next = strchr(adr,'<'); */
  3021.                 char* adr_next = adr;
  3022.                 while(*adr_next != '<' && (adr_next - r->adr) < r->size ) {
  3023.                   adr_next++;
  3024.                 }
  3025.                 /* Jump to near end (index hack) */
  3026.                 if (!adr_next || *adr_next != '<') {
  3027.                   if (
  3028.                     ( (int)(adr - r->adr) < (r->size - 4)) 
  3029.                     &&
  3030.                     (r->size > 4)
  3031.                     ) {
  3032.                       adr = r->adr + r->size - 2;
  3033.                     }
  3034.                 } else {
  3035.                   adr = adr_next;
  3036.                 }
  3037.               }
  3038.             }
  3039.           }
  3040.         }
  3041.  
  3042.         // ----------
  3043.         // Θcrire peu α peu
  3044.         if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR;
  3045.         lastsaved=adr;    // dernier Θcrit+1
  3046.         // ----------
  3047.  
  3048.         // Checks
  3049.         if (back_add_stats != opt->state.back_add_stats) {
  3050.           back_add_stats = opt->state.back_add_stats;
  3051.  
  3052.           // Check max time
  3053.           if (!back_checkmirror(opt)) {
  3054.             adr = r->adr + r->size;
  3055.           }
  3056.         }
  3057.  
  3058.         // pour les stats du shell si parsing trop long
  3059. #if HTS_ANALYSTE
  3060.         if (r->size)
  3061.           _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size);
  3062.         if (_hts_in_html_poll) {
  3063.           _hts_in_html_poll=0;
  3064.           // temps α attendre, et remplir autant que l'on peut le cache (backing)
  3065.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);        
  3066.           back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3067.  
  3068.           // Transfer rate
  3069.           engine_stats();
  3070.  
  3071.           // Refresh various stats
  3072.           HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3073.           HTS_STAT.stat_errors=fspc(NULL,"error");
  3074.           HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3075.           HTS_STAT.stat_infos=fspc(NULL,"info");
  3076.           HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3077.           HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3078.  
  3079.           if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3080.             if (opt->errlog) {
  3081.               fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3082.               test_flush;
  3083.             } 
  3084.             *stre->exit_xh_=1;  // exit requested
  3085.             XH_uninit;
  3086.             return -1;
  3087.             //adr = r->adr + r->size;  // exit
  3088.           } else if (_hts_cancel==1) {
  3089.             // adr = r->adr + r->size;  // exit
  3090.             nofollow=1;               // moins violent
  3091.             _hts_cancel=0;
  3092.           }
  3093.         }
  3094.  
  3095.         // refresh the backing system each 2 seconds
  3096.         if (engine_stats()) {
  3097.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);        
  3098.           back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3099.         }
  3100. #endif
  3101.       } while(( ((int) (adr - r->adr)) ) < r->size);
  3102. #if HTS_ANALYSTE
  3103.       _hts_in_html_parsing=0;  // flag
  3104.       _hts_cancel=0;           // pas de cancel
  3105. #endif
  3106.       if ((opt->getmode & 1) && (ptr>0)) {
  3107.         {
  3108.           char* cAddr = ht_buff;
  3109.           int cSize = ht_len;
  3110.           if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3111.             fspc(opt->log,"info"); fprintf(opt->log,"engine: postprocess-html: %s%s"LF, urladr, urlfil);
  3112.           }
  3113.           if (hts_htmlcheck_postprocess(&cAddr, &cSize, urladr, urlfil) == 1) {
  3114.             ht_buff = cAddr;
  3115.             ht_len = cSize;
  3116.           }
  3117.         }
  3118.  
  3119.         /* Flush and save to disk */
  3120.         HT_ADD_END;    // achever
  3121.       }
  3122.       //
  3123.       //
  3124.       //
  3125.     }  // if !error
  3126.  
  3127.  
  3128.     if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3129.     // sauver fichier
  3130.     //structcheck(savename);
  3131.     //filesave(opt,r->adr,r->size,savename);
  3132.  
  3133. #if HTS_ANALYSTE
  3134.   }  // analyse OK
  3135. #endif
  3136.  
  3137.   /* Apply changes */
  3138.   ENGINE_SAVE_CONTEXT();
  3139.  
  3140.   return 0;
  3141. }
  3142.  
  3143.  
  3144.  
  3145.  
  3146. /*
  3147. Check 301, 302, .. statuscodes (moved)
  3148. */
  3149. int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  3150.   /* Load engine variables */
  3151.   ENGINE_LOAD_CONTEXT();  
  3152.  
  3153.   // DEBUT rattrapage des 301,302,307..
  3154.   // ------------------------------------------------------------
  3155.   if (!error) {
  3156.     ////////{
  3157.     // on a chargΘ un fichier en plus
  3158.     // if (!error) stat_loaded+=r.size;
  3159.  
  3160.     // ------------------------------------------------------------
  3161.     // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing 
  3162.     // ------------------------------------------------------------
  3163.     if ( (r->statuscode==301) 
  3164.       || (r->statuscode==302)
  3165.       || (r->statuscode==303)
  3166.       || (r->statuscode==307)
  3167.       ) {          
  3168.         //if (r->adr!=NULL) {   // adr==null si fichier direct. [catch: davename normalement si cgi]
  3169.         //int i=0;
  3170.         char *rn=NULL;
  3171.         // char* p;
  3172.  
  3173.         if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3174.           //if (opt->errlog) {
  3175.           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil);
  3176.           test_flush;
  3177.         }
  3178.  
  3179.  
  3180.         {
  3181.           char BIGSTK mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
  3182.           int get_it=0;         // ne pas prendre le fichier α la mΩme adresse par dΘfaut
  3183.           int reponse=0;
  3184.           mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
  3185.           //
  3186.  
  3187.           strcpybuff(mov_url,r->location);
  3188.  
  3189.           // url qque -> adresse+fichier
  3190.           if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {                        
  3191.             int set_prio_to=0;    // pas de priotitΘ fixΘd par wizard
  3192.  
  3193.             // check whether URLHack is harmless or not
  3194.             if (opt->urlhack) {
  3195.               char BIGSTK n_adr[HTS_URLMAXSIZE*2], n_fil[HTS_URLMAXSIZE*2];
  3196.               char BIGSTK pn_adr[HTS_URLMAXSIZE*2], pn_fil[HTS_URLMAXSIZE*2];
  3197.               n_adr[0] = n_fil[0] = '\0';
  3198.               (void) adr_normalized(mov_adr, n_adr);
  3199.               (void) fil_normalized(mov_fil, n_fil);
  3200.               (void) adr_normalized(urladr, pn_adr);
  3201.               (void) fil_normalized(urlfil, pn_fil);
  3202.               if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) {
  3203.                 if (opt->errlog) {
  3204.                   fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s"LF, urladr, urlfil, mov_adr, mov_fil);
  3205.                   test_flush;
  3206.                 }
  3207.               }
  3208.             }
  3209.  
  3210.             //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) {    // ok URL reconnue
  3211.             // c'est (en gros) la mΩme URL..
  3212.             // si c'est un problΦme de casse dans le host c'est que le serveur est buggΘ
  3213.             // ("RFC says.." : host name IS case insensitive)
  3214.             if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) {  // identique α casse prΦs
  3215.               // on tourne en rond
  3216.               if (strcmp(mov_fil,urlfil)==0) {
  3217.                 error=1;
  3218.                 get_it=-1;        // ne rien faire
  3219.                 if (opt->errlog) {
  3220.                   fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
  3221.                   test_flush;
  3222.                 }
  3223.               } else {    // mauvaise casse, effacer entrΘe dans la pile et rejouer une fois
  3224.                 get_it=1;
  3225.               }
  3226.             } else {        // adresse diffΘrente
  3227.               if (ishtml(mov_url)==0) {   // pas mΩme adresse MAIS c'est un fichier non html (pas de page moved possible)
  3228.                 // -> on prend α cette adresse, le lien sera enregistrΘ avec lien_record() (hash)
  3229.                 if ((opt->debug>1) && (opt->log!=NULL)) {
  3230.                   fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
  3231.                   test_flush;
  3232.                 }
  3233.                 // acceptΘ?
  3234.                 if (hts_acceptlink(opt,ptr,lien_tot,liens,
  3235.                   mov_adr,mov_fil,
  3236.                   NULL, NULL,
  3237.                   &set_prio_to,
  3238.                   NULL) != 1) {                /* nouvelle adresse non refusΘe ? */
  3239.                     get_it=1;
  3240.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  3241.                       fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
  3242.                       test_flush;
  3243.                     }
  3244.                   }
  3245.               } /* sinon traitΘ normalement */
  3246.             }
  3247.  
  3248.             //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) {  // identique α casse prΦs
  3249.             if (get_it==1) {
  3250.               // court-circuiter le reste du traitement
  3251.               // et reculer pour mieux sauter
  3252.               if (opt->errlog) {
  3253.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
  3254.                 test_flush;
  3255.               }          
  3256.               // canceller lien actuel
  3257.               error=1;
  3258.               strcpybuff(liens[ptr]->adr,"!");  // caractΦre bidon (invalide hash)
  3259. #if HTS_HASH
  3260. #else
  3261.               liens[ptr]->sav_len=-1;       // taille invalide
  3262. #endif
  3263.               // noter NOUVEAU lien
  3264.               //xxc xxc
  3265.               //  set_prio_to=0+1;  // protection if the moved URL is an html page!!
  3266.               //xxc xxc
  3267.               {
  3268.                 char BIGSTK mov_sav[HTS_URLMAXSIZE*2];
  3269.                 // calculer lien et Θventuellement modifier addresse/fichier
  3270.                 if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { 
  3271.                   if (hash_read(hash,mov_sav,"",0,0)<0) {      // n'existe pas dΘja
  3272.                     // enregistrer lien (MACRO) avec SAV IDENTIQUE
  3273.                     liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
  3274.                     //liens_record(mov_adr,mov_fil,mov_sav,"","");
  3275.                     if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3276.                       // mode test?
  3277.                       liens[lien_tot]->testmode=liens[ptr]->testmode;
  3278.                       liens[lien_tot]->link_import=0;       // mode normal
  3279.                       if (!set_prio_to)
  3280.                         liens[lien_tot]->depth=liens[ptr]->depth;
  3281.                       else
  3282.                         liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth));       // PRIORITE NULLE (catch page)
  3283.                       liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3284.                       liens[lien_tot]->retry=liens[ptr]->retry;
  3285.                       liens[lien_tot]->premier=liens[ptr]->premier;
  3286.                       liens[lien_tot]->precedent=liens[ptr]->precedent;
  3287.                       lien_tot++;
  3288.                     } else {  // oups erreur, plus de mΘmoire!!
  3289.                       printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3290.                       if (opt->errlog) {
  3291.                         fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3292.                         test_flush;
  3293.                       }
  3294.                       //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3295.                       XH_uninit;    // dΘsallocation mΘmoire & buffers
  3296.                       return 0;
  3297.                     }
  3298.                   } else {
  3299.                     if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3300.                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
  3301.                       test_flush;
  3302.                     }
  3303.                   }
  3304.  
  3305.                 }
  3306.               }
  3307.  
  3308.               //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
  3309.  
  3310.               // note mΘtaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
  3311.               // sous DOS ca marche pas trΦs bien... mais comme je suis gΘnial url_savename()
  3312.               // est α mΩme de rΘgler ce problΦme
  3313.             }
  3314.           } // ident_url_xx
  3315.  
  3316.           if (get_it==0) {    // adresse vraiment diffΘrente et potentiellement en html (pas de possibilitΘ de bouger la page tel quel α cause des <img src..> et cie)
  3317.             rn=(char*) calloct(8192,1);
  3318.             if (rn!=NULL) {
  3319.               if (opt->errlog) {
  3320.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
  3321.                 test_flush;
  3322.               }
  3323.               if (!opt->mimehtml) {
  3324.                 escape_uri(mov_url);
  3325.               } else {
  3326.                 char BIGSTK buff[HTS_URLMAXSIZE*3];
  3327.                 strcpybuff(buff, mov_adr);
  3328.                 strcatbuff(buff, mov_fil);
  3329.                 escape_in_url(buff);
  3330.                 { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
  3331.                 strcpybuff(mov_url, "cid:");
  3332.                 strcatbuff(mov_url, buff);
  3333.               }
  3334.               // On prΘpare une page qui sautera immΘdiatement sur la bonne URL
  3335.               // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
  3336.               strcpybuff(rn,"<HTML>"CRLF);
  3337.               strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
  3338.               strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
  3339.               strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
  3340.               strcatbuff(rn,mov_url);    // URL
  3341.               strcatbuff(rn,"\">"CRLF);
  3342.               strcatbuff(rn,"<A HREF=\"");
  3343.               strcatbuff(rn,mov_url);
  3344.               strcatbuff(rn,"\">");
  3345.               strcatbuff(rn,"<B>Click here...</B></A>"CRLF);
  3346.               strcatbuff(rn,"</BODY>"CRLF);
  3347.               strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
  3348.               strcatbuff(rn,"</HTML>"CRLF);
  3349.  
  3350.               // changer la page
  3351.               if (r->adr) { 
  3352.                 freet(r->adr); 
  3353.                 r->adr=NULL; 
  3354.               }
  3355.               r->adr=rn;
  3356.               r->size=strlen(r->adr);
  3357.               strcpybuff(r->contenttype, "text/html");
  3358.             }
  3359.           }  // get_it==0
  3360.  
  3361.         }     // bloc
  3362.         // erreur HTTP (ex: 404, not found)
  3363.       } else if (
  3364.         (r->statuscode==412)
  3365.         || (r->statuscode==416)
  3366.         ) {    // Precondition Failed, c'est α dire pour nous redemander TOUT le fichier
  3367.           if (fexist(liens[ptr]->sav)) {
  3368.             remove(liens[ptr]->sav);    // Eliminer
  3369.             if (!fexist(liens[ptr]->sav)) {  // Bien ΘliminΘ? (sinon on boucle..)
  3370. #if HDEBUG
  3371.               printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
  3372. #endif
  3373.               if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
  3374.                 //if (opt->errlog) {
  3375.                 fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil);
  3376.                 test_flush;
  3377.               }
  3378.               // enregistrer le MEME lien (MACRO)
  3379.               liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
  3380.               if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3381.                 liens[lien_tot]->testmode=liens[ptr]->testmode;          // mode test?
  3382.                 liens[lien_tot]->link_import=0;       // pas mode import
  3383.                 liens[lien_tot]->depth=liens[ptr]->depth;
  3384.                 liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3385.                 liens[lien_tot]->retry=liens[ptr]->retry;
  3386.                 liens[lien_tot]->premier=liens[ptr]->premier;
  3387.                 liens[lien_tot]->precedent=ptr;
  3388.                 lien_tot++;
  3389.                 //
  3390.                 // canceller lien actuel
  3391.                 error=1;
  3392.                 strcpybuff(liens[ptr]->adr,"!");  // caractΦre bidon (invalide hash)
  3393. #if HTS_HASH
  3394. #else
  3395.                 liens[ptr]->sav_len=-1;       // taille invalide
  3396. #endif
  3397.                 //
  3398.               } else {  // oups erreur, plus de mΘmoire!!
  3399.                 printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3400.                 if (opt->errlog) {
  3401.                   fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3402.                   test_flush;
  3403.                 }
  3404.                 //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3405.                 XH_uninit;    // dΘsallocation mΘmoire & buffers
  3406.                 return 0;
  3407.               } 
  3408.             } else {
  3409.               if (opt->errlog!=NULL) {
  3410.                 fspc(opt->errlog,"error"); fprintf(opt->errlog,"Can not remove old file %s"LF,urlfil);
  3411.                 test_flush;
  3412.               }
  3413.             }
  3414.           } else {
  3415.             if (opt->errlog!=NULL) {
  3416.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil);
  3417.               test_flush;
  3418.             }
  3419.           }
  3420.         } else if (r->statuscode!=200) {
  3421.           int can_retry=0;
  3422.  
  3423.           // cas o∙ l'on peut reessayer
  3424.           // -2=timeout -3=rateout (interne α httrack)
  3425.           switch(r->statuscode) {
  3426.             //case -1: can_retry=1; break;
  3427.           case -2: if (opt->hostcontrol) {    // timeout et retry ΘpuisΘs
  3428.             if ((opt->hostcontrol & 1) && (liens[ptr]->retry<=0)) {
  3429.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3430.                 fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
  3431.               }
  3432.               host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr));
  3433.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3434.                 fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
  3435.               }
  3436.             } else can_retry=1;
  3437.                    } else can_retry=1;
  3438.             break;
  3439.           case -3: if ((opt->hostcontrol) && (liens[ptr]->retry<=0)) {    // too slow
  3440.             if (opt->hostcontrol & 2) {
  3441.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3442.                 fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
  3443.               }
  3444.               host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr));
  3445.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3446.                 fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
  3447.               }
  3448.             } else can_retry=1;
  3449.                    } else can_retry=1;
  3450.             break;
  3451.           case -4:            // connect closed
  3452.             can_retry=1;
  3453.             break;
  3454.           case -5:            // other (non fatal) error
  3455.             can_retry=1;
  3456.             break;
  3457.           case -6:            // bad SSL handskake
  3458.             can_retry=1;
  3459.             break;
  3460.           case 408: case 409: case 500: case 502: case 504: can_retry=1;
  3461.             break;
  3462.           }
  3463.  
  3464.           if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) {  // no primary (internal page 0)
  3465.             if ((liens[ptr]->retry<=0) || (!can_retry) ) {  // retry ΘpuisΘs (ou retry impossible)
  3466.               if (opt->errlog) {
  3467.                 if ((opt->retry>0) && (can_retry)){
  3468.                   fspc(opt->errlog,"error"); 
  3469.                   fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3470.                 } else {
  3471.                   if (r->statuscode==-10) {    // test OK
  3472.                     if ((opt->debug>0) && (opt->errlog!=NULL)) {
  3473.                       fspc(opt->errlog,"info"); 
  3474.                       fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3475.                     }
  3476.                   } else {
  3477.                     if (strcmp(urlfil,"/robots.txt")) {       // ne pas afficher d'infos sur robots.txt par dΘfaut
  3478.                       fspc(opt->errlog,"error"); 
  3479.                       fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3480.                     } else {
  3481.                       if (opt->debug>1) {
  3482.                         fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr);
  3483.                         test_flush;
  3484.                       }
  3485.                     }
  3486.                   }
  3487.                 }
  3488.                 test_flush;
  3489.               }
  3490.  
  3491.               // NO error in trop level
  3492.               // due to the "no connection -> previous restored" hack
  3493.               // This prevent the engine from wiping all data if the website has been deleted (or moved)
  3494.               // since last time (which is quite annoying)
  3495.               if (liens[ptr]->precedent != 0) {
  3496.                 // ici on teste si on doit enregistrer la page tout de mΩme
  3497.                 if (opt->errpage) {
  3498.                   store_errpage=1;
  3499.                 }
  3500.               } else {
  3501.                 if (strcmp(urlfil,"/robots.txt") != 0) {
  3502.                   /*
  3503.                   This is an error caused by a link entered by the user
  3504.                   That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
  3505.                   If all links entered are invalid, the session failed and we will attempt to restore
  3506.                   the previous one
  3507.                   Example: Try to update a website which has been deleted remotely: this may delete
  3508.                   the website locally, which is really not desired (especially if the website disappeared!)
  3509.                   With this hack, the engine won't wipe local files (how clever)
  3510.                   */
  3511.                   HTS_STAT.stat_errors_front++;
  3512.                 }
  3513.               }
  3514.  
  3515.             } else {    // retry!!
  3516.               if (opt->debug>0 && opt->errlog != NULL) {  // on fera un alert si le retry Θchoue               
  3517.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3518.                 test_flush;
  3519.               }
  3520.               // redemander fichier
  3521.               liens_record(urladr,urlfil,savename,"","");
  3522.               if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3523.                 liens[lien_tot]->testmode=liens[ptr]->testmode;          // mode test?
  3524.                 liens[lien_tot]->link_import=0;       // pas mode import
  3525.                 liens[lien_tot]->depth=liens[ptr]->depth;
  3526.                 liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3527.                 liens[lien_tot]->retry=liens[ptr]->retry-1;    // moins 1 retry!
  3528.                 liens[lien_tot]->premier=liens[ptr]->premier;
  3529.                 liens[lien_tot]->precedent=liens[ptr]->precedent;
  3530.                 lien_tot++;
  3531.               } else {  // oups erreur, plus de mΘmoire!!
  3532.                 printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3533.                 if (opt->errlog) {
  3534.                   fspc(opt->errlog,"panic"); 
  3535.                   fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3536.                   test_flush;
  3537.                 }
  3538.                 //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3539.                 XH_uninit;    // dΘsallocation mΘmoire & buffers
  3540.                 return 0;
  3541.               } 
  3542.             }
  3543.           } else {
  3544.             if (opt->errlog) {
  3545.               if (opt->debug>1) {
  3546.                 fspc(opt->errlog,"info"); 
  3547.                 fprintf(opt->errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
  3548.               }
  3549.             }
  3550.           }
  3551.           if (!store_errpage) {
  3552.             if (r->adr) {     // dΘsalloc
  3553.               freet(r->adr); 
  3554.               r->adr=NULL; 
  3555.             }
  3556.             error=1;  // erreur!
  3557.           }
  3558.         }
  3559.         // FIN rattrapage des 301,302,307..
  3560.         // ------------------------------------------------------------
  3561.  
  3562.   }  // if !error
  3563.  
  3564.  
  3565.   /* Apply changes */
  3566.   ENGINE_SAVE_CONTEXT();
  3567.  
  3568.   return 0;
  3569.  
  3570.  
  3571. }
  3572.  
  3573.  
  3574.  
  3575. /*
  3576. Wait for next file and
  3577. check 301, 302, .. statuscodes (moved)
  3578. */
  3579. int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  3580.   /* Load engine variables */
  3581.   ENGINE_LOAD_CONTEXT();
  3582.   /* */
  3583.   int b;
  3584.   int n;
  3585.  
  3586. #if BDEBUG==1
  3587.   printf("\nBack test..\n");
  3588. #endif
  3589.  
  3590.   // pause/lock files
  3591.   {
  3592.     int do_pause=0;
  3593.  
  3594.     // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
  3595.     if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
  3596.       // remove lockfile
  3597.       remove(fconcat(opt->path_log,"hts-stop.lock"));
  3598.       if (!fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
  3599.         do_pause=1;
  3600.       }
  3601.     }
  3602.  
  3603.     // after receving N bytes, pause
  3604.     if (opt->fragment>0) {
  3605.       if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) {
  3606.         do_pause=1;
  3607.       }
  3608.     }
  3609.  
  3610.     // pause?
  3611.     if (do_pause) {
  3612.       if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3613.         fspc(opt->log,"info"); fprintf(opt->log,"engine: pause requested.."LF);
  3614.       }
  3615.       while (back_nsoc(back,back_max)>0) {                  // attendre fin des transferts
  3616.         back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3617.         Sleep(200);
  3618. #if HTS_ANALYSTE
  3619.         {
  3620.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3621.  
  3622.           // Transfer rate
  3623.           engine_stats();
  3624.  
  3625.           // Refresh various stats
  3626.           HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3627.           HTS_STAT.stat_errors=fspc(NULL,"error");
  3628.           HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3629.           HTS_STAT.stat_infos=fspc(NULL,"info");
  3630.           HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3631.           HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3632.  
  3633.           b=0;
  3634.           if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)
  3635.             || !back_checkmirror(opt)) {
  3636.               if (opt->errlog) {
  3637.                 fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3638.                 test_flush;
  3639.               }
  3640.               *stre->exit_xh_=1;  // exit requested
  3641.               XH_uninit;
  3642.               return 0;
  3643.             }
  3644.         }
  3645. #endif
  3646.       }
  3647.       // On dΘsalloue le buffer d'enregistrement des chemins crΘΘe, au cas o∙ pendant la pause
  3648.       // l'utilisateur ferait un rm -r aprΦs avoir effectuΘ un tar
  3649.       // structcheck_init(1);
  3650.       {
  3651.         FILE* fp = fopen(fconcat(opt->path_log,"hts-paused.lock"),"wb");
  3652.         if (fp) {
  3653.           fspc(fp,"info");  // dater
  3654.           fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes);
  3655.           fclose(fp);
  3656.         }
  3657.       }
  3658.       stat_fragment=HTS_STAT.stat_bytes;
  3659.       /* Info for wrappers */
  3660.       if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3661.         fspc(opt->log,"info"); fprintf(opt->log,"engine: pause: %s"LF,fconcat(opt->path_log,"hts-paused.lock"));
  3662.       }
  3663. #if HTS_ANALYSTE
  3664.       hts_htmlcheck_pause(fconcat(opt->path_log,"hts-paused.lock"));
  3665. #else
  3666.       while (fexist(fconcat(opt->path_log,"hts-paused.lock"))) {
  3667.         //back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);   inutile!! (plus de sockets actives)
  3668.         Sleep(1000);
  3669.       }
  3670. #endif
  3671.     }
  3672.     //
  3673.   }
  3674.   // end of pause/lock files
  3675.  
  3676. #if HTS_ANALYSTE
  3677.   // changement dans les prΘfΘrences
  3678.   /*
  3679.   if (_hts_setopt) {
  3680.   copy_htsopt(_hts_setopt,opt);    // copier au besoin
  3681.   _hts_setopt=NULL;                 // effacer callback
  3682.   }
  3683.   */
  3684.   if (_hts_addurl) {
  3685.     char BIGSTK add_adr[HTS_URLMAXSIZE*2];
  3686.     char BIGSTK add_fil[HTS_URLMAXSIZE*2];
  3687.     while(*_hts_addurl) {
  3688.       char BIGSTK add_url[HTS_URLMAXSIZE*2];
  3689.       add_adr[0]=add_fil[0]=add_url[0]='\0';
  3690.       if (!link_has_authority(*_hts_addurl))
  3691.         strcpybuff(add_url,"http://");          // ajouter http://
  3692.       strcatbuff(add_url,*_hts_addurl);
  3693.       if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
  3694.         // ----Ajout----
  3695.         // noter NOUVEAU lien
  3696.         char BIGSTK add_sav[HTS_URLMAXSIZE*2];
  3697.         // calculer lien et Θventuellement modifier addresse/fichier
  3698.         if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { 
  3699.           if (hash_read(hash,add_sav,"",0,0)<0) {      // n'existe pas dΘja
  3700.             // enregistrer lien (MACRO)
  3701.             liens_record(add_adr,add_fil,add_sav,"","");
  3702.             if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3703.               liens[lien_tot]->testmode=0;          // mode test?
  3704.               liens[lien_tot]->link_import=0;       // mode normal
  3705.               liens[lien_tot]->depth=opt->depth;
  3706.               liens[lien_tot]->pass2=max(0,numero_passe);
  3707.               liens[lien_tot]->retry=opt->retry;
  3708.               liens[lien_tot]->premier=lien_tot;
  3709.               liens[lien_tot]->precedent=lien_tot;
  3710.               lien_tot++;
  3711.               //
  3712.               if ((opt->debug>0) && (opt->log!=NULL)) {
  3713.                 fspc(opt->log,"info"); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
  3714.               }
  3715.               //
  3716.             } else {  // oups erreur, plus de mΘmoire!!
  3717.               printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3718.               if (opt->errlog) {
  3719.                 fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3720.                 test_flush;
  3721.               }
  3722.               //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3723.               XH_uninit;    // dΘsallocation mΘmoire & buffers
  3724.               return 0;
  3725.             }
  3726.           } else {
  3727.             if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3728.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
  3729.               test_flush;
  3730.             }
  3731.           }
  3732.  
  3733.         }
  3734.       } else {
  3735.         if (opt->errlog) {
  3736.           fspc(opt->errlog,"error");
  3737.           fprintf(opt->errlog,"Error during URL decoding for %s"LF,add_url);
  3738.           test_flush;
  3739.         }
  3740.       }
  3741.       // ----Fin Ajout----
  3742.       _hts_addurl++;                  // suivante
  3743.     }
  3744.     _hts_addurl=NULL;           // libΘrer _hts_addurl
  3745.   }
  3746.   // si une pause a ΘtΘ demandΘe
  3747.   if (_hts_setpause || back_pluggable_sockets_strict(back, back_max, opt) <= 0) {
  3748.     // index du lien actuel
  3749.     int b=back_index(back,back_max,urladr,urlfil,savename);
  3750.     int prev = _hts_in_html_parsing;
  3751.     if (b<0) b=0;    // forcer pour les stats
  3752.     while(_hts_setpause || back_pluggable_sockets_strict(back, back_max, opt) <= 0) {    // on fait la pause..
  3753.       _hts_in_html_parsing = 6;
  3754.       back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3755.  
  3756.       // Transfer rate
  3757.       engine_stats();
  3758.  
  3759.       // Refresh various stats
  3760.       HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3761.       HTS_STAT.stat_errors=fspc(NULL,"error");
  3762.       HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3763.       HTS_STAT.stat_infos=fspc(NULL,"info");
  3764.       HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3765.       HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3766.  
  3767.       if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3768.         if (opt->errlog) {
  3769.           fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3770.           test_flush;
  3771.         }
  3772.         *stre->exit_xh_=1;  // exit requested
  3773.         XH_uninit;
  3774.         return 0;
  3775.       }
  3776.       Sleep(100);  // pause
  3777.     }
  3778.     _hts_in_html_parsing = prev;
  3779.   }
  3780. #endif
  3781.  
  3782.   // si le fichier n'est pas en backing, le mettre..
  3783.   if (!back_exist(back,back_max,urladr,urlfil,savename)) {
  3784. #if BDEBUG==1
  3785.     printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
  3786. #endif
  3787.     if (back_add(back,back_max,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
  3788.       printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
  3789. #if BDEBUG==1
  3790.       printf("error while crash adding\n");
  3791. #endif
  3792.       if (opt->errlog) {
  3793.         fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
  3794.         test_flush;
  3795.       } 
  3796.  
  3797.     }
  3798.   }
  3799.  
  3800. #if BDEBUG==1
  3801.   printf("test number of socks\n");
  3802. #endif
  3803.  
  3804.   // ajouter autant de socket qu'on peut ajouter
  3805.   n=opt->maxsoc-back_nsoc(back,back_max);
  3806. #if BDEBUG==1
  3807.   printf("%d sockets available for backing\n",n);
  3808. #endif
  3809.  
  3810. #if HTS_ANALYSTE
  3811.   if ((n>0) && (!_hts_setpause)) {   // si sockets libre et pas en pause, ajouter
  3812. #else
  3813.   if (n>0) {                         // si sockets libre
  3814. #endif
  3815.     // remplir autant que l'on peut le cache (backing)
  3816.     back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3817.   }
  3818.  
  3819.   // index du lien actuel
  3820.   /*
  3821.   b=back_index(back,back_max,urladr,urlfil,savename);
  3822.  
  3823.   if (b>=0) 
  3824.   */
  3825.   {
  3826.     // ------------------------------------------------------------
  3827.     // attendre que le fichier actuel soit prΩt - BOUCLE D'ATTENTE
  3828.     do {
  3829.  
  3830.       // index du lien actuel
  3831.       b=back_index(back,back_max,urladr,urlfil,savename);
  3832. #if BDEBUG==1
  3833.       printf("back index %d, waiting\n",b);
  3834. #endif
  3835.       // Continue to the loop if link still present
  3836.       if (b<0)
  3837.         continue;
  3838.  
  3839.       // Receive data
  3840.       if (back[b].status>0)
  3841.         back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3842.  
  3843.       // Continue to the loop if link still present
  3844.       b=back_index(back,back_max,urladr,urlfil,savename);
  3845.       if (b<0)
  3846.         continue;
  3847.  
  3848.       // Stop the mirror
  3849.       if (!back_checkmirror(opt)) {
  3850.         *stre->exit_xh_=1;  // exit requested
  3851.         XH_uninit;
  3852.         return 0;
  3853.       }
  3854.  
  3855.       // And fill the backing stack
  3856.       if (back[b].status>0)
  3857.         back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3858.  
  3859.       // Continue to the loop if link still present
  3860.       b=back_index(back,back_max,urladr,urlfil,savename);
  3861.       if (b<0)
  3862.         continue;
  3863.  
  3864.       // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
  3865.       if ((opt->makestat) || (opt->maketrack)) {
  3866.         TStamp l=time_local();
  3867.         if ((int) (l-makestat_time) >= 60) {   
  3868.           if (makestat_fp != NULL) {
  3869.             fspc(makestat_fp,"info");
  3870.             fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot);
  3871.             fflush(makestat_fp);
  3872.             *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV;
  3873.             *stre->makestat_lnk_=lien_tot;
  3874.           }
  3875.           if (stre->maketrack_fp != NULL) {
  3876.             int i;
  3877.             fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF);
  3878.             for(i=0;i<back_max;i++) {
  3879.               back_info(back,i,3,stre->maketrack_fp);
  3880.             }
  3881.             fprintf(stre->maketrack_fp,LF);
  3882.             fflush(stre->maketrack_fp);
  3883.  
  3884.           }
  3885.           makestat_time=l;
  3886.         }
  3887.       }
  3888. #if HTS_ANALYSTE
  3889.       {
  3890.         int i;
  3891.         {
  3892.           char* s=hts_cancel_file("");
  3893.           if (strnotempty(s)) {    // fichier α canceller
  3894.             for(i=0;i<back_max;i++) {
  3895.               if ((back[i].status>0)) {
  3896.                 if (strcmp(back[i].url_sav,s)==0) {  // ok trouvΘ
  3897.                   if (back[i].status != 1000) {
  3898. #if HTS_DEBUG_CLOSESOCK
  3899.                     DEBUG_W("user cancel: deletehttp\n");
  3900. #endif
  3901.                     if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
  3902.                     back[i].r.soc=INVALID_SOCKET;
  3903.                     back[i].r.statuscode=-1;
  3904.                     strcpybuff(back[i].r.msg,"Cancelled by User");
  3905.                     back[i].status=0;  // terminΘ
  3906.                   } else    // cancel ftp.. flag α 1
  3907.                     back[i].stop_ftp = 1;
  3908.                 }
  3909.               }
  3910.             }
  3911.             s[0]='\0';
  3912.           }
  3913.         }
  3914.  
  3915.         // Transfer rate
  3916.         engine_stats();
  3917.  
  3918.         // Refresh various stats
  3919.         HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3920.         HTS_STAT.stat_errors=fspc(NULL,"error");
  3921.         HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3922.         HTS_STAT.stat_infos=fspc(NULL,"info");
  3923.         HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3924.         HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3925.  
  3926.         if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3927.           if (opt->errlog) {
  3928.             fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3929.             test_flush;
  3930.           } 
  3931.           *stre->exit_xh_=1;  // exit requested
  3932.           XH_uninit;
  3933.           return 0;
  3934.         }
  3935.       }
  3936.  
  3937. #endif
  3938. #if HTS_POLL
  3939.       if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) {
  3940.         TStamp tl;
  3941.         *stre->info_shell_=1;
  3942.  
  3943.         /* Toggle with ENTER */
  3944.         if (!opt->quiet) {
  3945.           if (check_stdin()) {
  3946.             char com[256];
  3947.             linput(stdin,com,200);
  3948.             if (opt->verbosedisplay==2)
  3949.               opt->verbosedisplay=1;
  3950.             else
  3951.               opt->verbosedisplay=2;
  3952.             /* Info for wrappers */
  3953.             if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3954.               fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF);
  3955.             }
  3956. #if HTS_ANALYSTE
  3957.             hts_htmlcheck_chopt(opt);
  3958. #endif
  3959.           }
  3960.         }
  3961.  
  3962.         tl=time_local();
  3963.  
  3964.         // gΘnΘrer un message d'infos sur l'Θtat actuel
  3965.         if (opt->shell) {    // si shell
  3966.           if ((tl-*stre->last_info_shell_)>0) {    // toute les 1 sec
  3967.             FILE* fp=stdout;
  3968.             int a=0;
  3969.             *stre->last_info_shell_=tl;
  3970.             if (fexist(fconcat(opt->path_log,"hts-autopsy"))) {  // dΘbuggage: teste si le robot est vivant
  3971.               // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
  3972.               // (libΘrons les robots esclaves de l'internet!)
  3973.               remove(fconcat(opt->path_log,"hts-autopsy"));
  3974.               fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb");
  3975.               a=1;
  3976.             }
  3977.             if ((*stre->info_shell_) || a) {
  3978.               int i,j;
  3979.  
  3980.               fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
  3981.               fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
  3982.               fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
  3983.               fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
  3984.               fprintf(fp,"LINK %d"LF,lien_tot);
  3985.               {
  3986.                 LLint mem=0;
  3987.                 for(i=0;i<back_max;i++)
  3988.                   if (back[i].r.adr!=NULL)
  3989.                     mem+=back[i].r.size;
  3990.                 fprintf(fp,"INMEM "LLintP""LF,(LLint)mem);
  3991.               }
  3992.               for(j=0;j<2;j++) {  // passes pour ready et wait
  3993.                 for(i=0;i<back_max;i++) {
  3994.                   back_info(back,i,j+1,stdout);    // maketrack_fp a la place de stdout ?? // **
  3995.                 }
  3996.               }
  3997.               fprintf(fp,LF);
  3998.               if (a)
  3999.                 fclose(fp);
  4000.               io_flush;
  4001.             }
  4002.           }
  4003.         }  // si shell
  4004.  
  4005.       }  // si shell ou keyboard (option)
  4006.       //
  4007. #endif
  4008.     } while((b>=0) && (back[max(b,0)].status>0));
  4009.  
  4010.  
  4011.     // If link not found on the stack, it's because it has already been downloaded
  4012.     // in background
  4013.     // Then, skip it and go to the next one
  4014.     if (b<0) {
  4015.       if ((opt->debug>1) && (opt->log!=NULL)) {
  4016.         fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
  4017.         test_flush;
  4018.       }
  4019.  
  4020.       // prochain lien
  4021.       // ptr++;
  4022.  
  4023.       return 2; // goto jump_if_done;
  4024.  
  4025.     }
  4026. #if 0
  4027.     /* FIXME - finalized HAS NO MORE THIS MEANING */
  4028.     /* link put in cache by the backing system for memory spare - reclaim */
  4029.     else if (back[b].finalized) {
  4030.       assertf(back[b].r.adr == NULL);
  4031.       /* read file in cache */
  4032.       back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer);
  4033.       /* ensure correct location buffer set */
  4034.       back[b].r.location=back[b].location_buffer;
  4035.       if (back[b].r.statuscode == -1) {
  4036.         if (opt->errlog) {
  4037.           fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil);
  4038.           test_flush;
  4039.         }
  4040.       } else {
  4041.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  4042.           fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush;
  4043.         }
  4044.       }
  4045.     }
  4046. #endif
  4047.  
  4048. #if HTS_ANALYSTE==2
  4049. #else
  4050.     //if (!opt->quiet) {  // petite animation
  4051.     if (!opt->verbosedisplay) {
  4052.       if (!opt->quiet) {
  4053.         static int roll=0;  /* static: ok */
  4054.         roll=(roll+1)%4;
  4055.         printf("%c\x0d",("/-\\|")[roll]);
  4056.         fflush(stdout);
  4057.       }
  4058.     } else if (opt->verbosedisplay==1) {
  4059.       if (back[b].r.statuscode==200)
  4060.         printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size);
  4061.       else
  4062.         printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode);
  4063.       fflush(stdout);
  4064.     }
  4065.     //}
  4066. #endif
  4067.     // ------------------------------------------------------------
  4068.     // VΘrificateur d'intΘgritΘ
  4069. #if DEBUG_CHECKINT
  4070.     _CHECKINT(&back[b],"Retour de back_wait, aprΦs le while")
  4071.     {
  4072.       int i;
  4073.       for(i=0;i<back_max;i++) {
  4074.         char si[256];
  4075.         sprintf(si,"Test global aprΦs back_wait, index %d",i);
  4076.         _CHECKINT(&back[i],si)
  4077.       }
  4078.     }
  4079. #endif
  4080.  
  4081.     // copier structure rΘponse htsblk
  4082.     memcpy(r, &(back[b].r), sizeof(htsblk));
  4083.     r->location=stre->loc_;    // ne PAS copier location!! adresse, pas de buffer
  4084.     if (back[b].r.location) 
  4085.       strcpybuff(r->location,back[b].r.location);
  4086.     back[b].r.adr=NULL;    // ne pas faire de desalloc ensuite
  4087.  
  4088.     // libΘrer emplacement backing
  4089.     back_maydelete(opt,cache,back,b);
  4090.  
  4091.     // progression
  4092. #if 0
  4093.     if (opt->aff_progress) {
  4094.       TStamp tl=time_local();
  4095.       if ((tl-HTS_STAT.stat_timestart)>0) {
  4096.         char s[32];
  4097.         int i=0;
  4098.         lastime=tl;
  4099.         _CLRSCR; _GOTOXY("1","1");
  4100.         printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
  4101.         while(i<minimum(back_max,99)) {  // **
  4102.           if (back[i].status>=0) {  // loading..
  4103.             s[0]='\0';
  4104.             if (strlen(back[i].url_fil)>16)
  4105.               strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16);       
  4106.             else
  4107.               strncatbuff(s,back[i].url_fil,16);
  4108.             printf("%s : ",s);
  4109.  
  4110.             printf("[");
  4111.             if (back[i].r.totalsize>0) {
  4112.               int p;
  4113.               int j;
  4114.               p=(int)((back[i].r.size*10)/back[i].r.totalsize);
  4115.               p=minimum(10,p);
  4116.               for(j=0;j<p;j++) printf("*");
  4117.               for(j=0;j<(10-p);j++) printf("-");
  4118.             } else { 
  4119.               printf(LLintP,(LLint)back[i].r.size);                      
  4120.             }
  4121.             printf("]");
  4122.  
  4123.             //} else if (back[i].status==0) {
  4124.             //  strcpybuff(s,"ENDED");
  4125.           } 
  4126.           printf("\n");
  4127.           i++;
  4128.         }
  4129.         io_flush;
  4130.       }
  4131.     }
  4132. #endif
  4133.  
  4134.     // dΘbug graphique
  4135. #if BDEBUG==2
  4136.     {
  4137.       char s[12];
  4138.       int i=0;
  4139.       _GOTOXY(1,1);
  4140.       printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
  4141.       while(i<minimum(back_max,160)) {
  4142.         if (back[i].status>0) {
  4143.           sprintf(s,"%d",back[i].r.size);
  4144.         } else if (back[i].status==0) {
  4145.           strcpybuff(s,"ENDED");
  4146.         } else 
  4147.           strcpybuff(s,"   -   ");
  4148.         while(strlen(s)<8) strcatbuff(s," ");
  4149.         printf("%s",s); io_flush;
  4150.         i++;
  4151.       }
  4152.     }
  4153. #endif
  4154.  
  4155.  
  4156. #if BDEBUG==1
  4157.     printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg);
  4158. #endif
  4159.  
  4160.   }
  4161.   /*else {
  4162.   #if BDEBUG==1
  4163.   printf("back index error\n");
  4164.   #endif
  4165.   }
  4166.   */
  4167.  
  4168.  
  4169.  
  4170.   ENGINE_SAVE_CONTEXT();
  4171.  
  4172.   return 0;
  4173.  
  4174.  
  4175. }
  4176.  
  4177.  
  4178.